Kafka 设置手动偏移量重启异常
问题描述:
kafka分4个分区
将偏移量保存redis中
只要redis中DWD_ORDER_INFO主题的2分区重启后就会抛异常
删除后可以成功运行,重启后又会出错
ERROR (org.apache.spark.streaming.StreamingContext:logError) - Error starting the context, marking it as stopped
java.lang.IllegalStateException: No current assignment for partition DWD_ORDER_INFO-2
//双流合并
//fenqu
val topicOrderInfo = "DWD_ORDER_INFO"
val groupIdOrderInfo = "dws_order_info_group"
//获取偏移量
val kafkaOffsetOrderinfo: Map[TopicPartition, Long] = OffsetManager.getOffset(topicOrderInfo, groupIdOrderInfo)
var recordInputStreamOrderInfo: InputDStream[ConsumerRecord[String, String]] = null
if (kafkaOffsetOrderinfo != null && kafkaOffsetOrderinfo.size > 0) {
recordInputStreamOrderInfo = MykafkaUtil.getKafkaStream(topicOrderInfo, ssc, kafkaOffsetOrderinfo, groupIdOrderInfo)
} else {
recordInputStreamOrderInfo = MykafkaUtil.getKafkaStream(topicOrderInfo, ssc, groupIdOrderInfo)
}
//获取OffsetRange
var offsetRangesOrderinfo: Array[OffsetRange] = Array.empty[OffsetRange]
val recordInputDSOrderinfo: DStream[ConsumerRecord[String, String]] = recordInputStreamOrderInfo.transform { rdd =>
offsetRangesOrderinfo = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}
val topicOrderDetail = "DWD_ORDER_DETAIL"
val groupIdOrderDetail = "dws_order_detail_group"
//获取偏移量
val kafkaOffsetOrderDetail: Map[TopicPartition, Long] = OffsetManager.getOffset(topicOrderDetail, groupIdOrderDetail)
var recordInputStreamOrderDetail: InputDStream[ConsumerRecord[String, String]] = null
if (kafkaOffsetOrderinfo != null && kafkaOffsetOrderinfo.size > 0) {
recordInputStreamOrderDetail = MykafkaUtil.getKafkaStream(topicOrderDetail, ssc, kafkaOffsetOrderinfo, groupIdOrderDetail)
} else {
recordInputStreamOrderDetail = MykafkaUtil.getKafkaStream(topicOrderDetail, ssc, groupIdOrderDetail)
}
//获取OffsetRange
var offsetRangesOrderDetail: Array[OffsetRange] = Array.empty[OffsetRange]
val recordInputDSOrderDetail: DStream[ConsumerRecord[String, String]] = recordInputStreamOrderDetail.transform { rdd =>
offsetRangesOrderDetail = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
rdd
}
//提交偏移量
OffsetManager.saveOffset(topicOrderInfo, groupIdOrderInfo, offsetRangesOrderinfo)
OffsetManager.saveOffset(topicOrderDetail, groupIdOrderDetail, offsetRangesOrderDetail)
//获取偏移量
//从redis 中获取偏移量
def getOffset(topicName:String,groupId:String):Map[TopicPartition,Long]={
//type:hash key:"offset:[topic]:[groupid]" field:"" partition_id value offset expire
val client: Jedis = RedisUtil.getJedisClient
val offsetKey: String = "offset:" + topicName + ":" + groupId
val offsetMap: util.Map[String, String] = client.hgetAll(offsetKey)
client.close()
import scala.collection.JavaConversions._
val kafkaOffsetMap: Map[TopicPartition, Long] = offsetMap.map {
case (partotionId, offset) => {
println("加载分区的偏移量:" + partotionId + ":" + offset)
(new TopicPartition(topicName, partotionId.toInt), offset.toLong)
}
}.toMap
kafkaOffsetMap
}
//保存偏移量
def saveOffset(topicName:String,groupId:String,offsetRanges: Array[OffsetRange])={
if(offsetRanges != null && offsetRanges.size > 0){
val offsetKey: String = "offset:" + topicName + ":" + groupId
import scala.collection.JavaConversions._
val offsetMap: Map[String, String] = offsetRanges.map {
offsetRange => {
println("写入偏移量:" + offsetRange.partition + ":" + offsetRange.fromOffset + "->" + offsetRange.untilOffset)
(offsetRange.partition + "", offsetRange.untilOffset + "")
}
}.toMap
val client: Jedis = RedisUtil.getJedisClient
client.hmset(offsetKey,offsetMap)
client.close()
}
异常信息
加载分区的偏移量:0:128
加载分区的偏移量:1:129
加载分区的偏移量:2:127
加载分区的偏移量:3:134
加载分区的偏移量:0:152
加载分区的偏移量:1:144
加载分区的偏移量:2:154
加载分区的偏移量:3:161
2021-04-09 21:46:10 ERROR (org.apache.spark.streaming.StreamingContext:logError) - Error starting the context, marking it as stopped
java.lang.IllegalStateException: No current assignment for partition DWD_ORDER_INFO-2
at org.apache.kafka.clients.consumer.internals.SubscriptionState.assignedState(SubscriptionState.java:269)
at org.apache.kafka.clients.consumer.internals.SubscriptionState.seek(SubscriptionState.java:294)
at org.apache.kafka.clients.consumer.KafkaConsumer.seek(KafkaConsumer.java:1331)
at org.apache.spark.streaming.kafka010.Subscribe$$anonfun$onStart$2.apply(ConsumerStrategy.scala:107)
at org.apache.spark.streaming.kafka010.Subscribe$$anonfun$onStart$2.apply(ConsumerStrategy.scala:106)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at org.apache.spark.streaming.kafka010.Subscribe.onStart(ConsumerStrategy.scala:106)
at org.apache.spark.streaming.kafka010.DirectKafkaInputDStream.consumer(DirectKafkaInputDStream.scala:73)
at org.apache.spark.streaming.kafka010.DirectKafkaInputDStream.start(DirectKafkaInputDStream.scala:259)
at org.apache.spark.streaming.DStreamGraph$$anonfun$start$7.apply(DStreamGraph.scala:54)
at org.apache.spark.streaming.DStreamGraph$$anonfun$start$7.apply(DStreamGraph.scala:54)
at scala.collection.parallel.mutable.ParArray$ParArrayIterator.foreach_quick(ParArray.scala:143)
at scala.collection.parallel.mutable.ParArray$ParArrayIterator.foreach(ParArray.scala:136)
at scala.collection.parallel.ParIterableLike$Foreach.leaf(ParIterableLike.scala:972)
at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply$mcV$sp(Tasks.scala:49)
at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:48)
at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:48)
at scala.collection.parallel.Task$class.tryLeaf(Tasks.scala:51)
at scala.collection.parallel.ParIterableLike$Foreach.tryLeaf(ParIterableLike.scala:969)
at scala.collection.parallel.AdaptiveWorkStealingTasks$WrappedTask$class.compute(Tasks.scala:152)
at scala.collection.parallel.AdaptiveWorkStealingForkJoinTasks$WrappedTask.compute(Tasks.scala:443)
at scala.concurrent.forkjoin.RecursiveAction.exec(RecursiveAction.java:160)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
at ... run in separate thread using org.apache.spark.util.ThreadUtils ... ()
at org.apache.spark.streaming.StreamingContext.liftedTree1$1(StreamingContext.scala:578)
at org.apache.spark.streaming.StreamingContext.start(StreamingContext.scala:572)
at com.at.mall.realtime.dws.OrderDetailWideApp$.main(OrderDetailWideApp.scala:255)
at com.at.mall.realtime.dws.OrderDetailWideApp.main(OrderDetailWideApp.scala)
Exception in thread "main" java.lang.IllegalStateException: No current assignment for partition DWD_ORDER_INFO-2
at org.apache.kafka.clients.consumer.internals.SubscriptionState.assignedState(SubscriptionState.java:269)
at org.apache.kafka.clients.consumer.internals.SubscriptionState.seek(SubscriptionState.java:294)
at org.apache.kafka.clients.consumer.KafkaConsumer.seek(KafkaConsumer.java:1331)
at org.apache.spark.streaming.kafka010.Subscribe$$anonfun$onStart$2.apply(ConsumerStrategy.scala:107)
at org.apache.spark.streaming.kafka010.Subscribe$$anonfun$onStart$2.apply(ConsumerStrategy.scala:106)
at scala.collection.Iterator$class.foreach(Iterator.scala:893)
at scala.collection.AbstractIterator.foreach(Iterator.scala:1336)
at scala.collection.IterableLike$class.foreach(IterableLike.scala:72)
at scala.collection.AbstractIterable.foreach(Iterable.scala:54)
at org.apache.spark.streaming.kafka010.Subscribe.onStart(ConsumerStrategy.scala:106)
at org.apache.spark.streaming.kafka010.DirectKafkaInputDStream.consumer(DirectKafkaInputDStream.scala:73)
at org.apache.spark.streaming.kafka010.DirectKafkaInputDStream.start(DirectKafkaInputDStream.scala:259)
at org.apache.spark.streaming.DStreamGraph$$anonfun$start$7.apply(DStreamGraph.scala:54)
at org.apache.spark.streaming.DStreamGraph$$anonfun$start$7.apply(DStreamGraph.scala:54)
at scala.collection.parallel.mutable.ParArray$ParArrayIterator.foreach_quick(ParArray.scala:143)
at scala.collection.parallel.mutable.ParArray$ParArrayIterator.foreach(ParArray.scala:136)
at scala.collection.parallel.ParIterableLike$Foreach.leaf(ParIterableLike.scala:972)
at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply$mcV$sp(Tasks.scala:49)
at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:48)
at scala.collection.parallel.Task$$anonfun$tryLeaf$1.apply(Tasks.scala:48)
at scala.collection.parallel.Task$class.tryLeaf(Tasks.scala:51)
at scala.collection.parallel.ParIterableLike$Foreach.tryLeaf(ParIterableLike.scala:969)
at scala.collection.parallel.AdaptiveWorkStealingTasks$WrappedTask$class.compute(Tasks.scala:152)
at scala.collection.parallel.AdaptiveWorkStealingForkJoinTasks$WrappedTask.compute(Tasks.scala:443)
at scala.concurrent.forkjoin.RecursiveAction.exec(RecursiveAction.java:160)
at scala.concurrent.forkjoin.ForkJoinTask.doExec(ForkJoinTask.java:260)
at scala.concurrent.forkjoin.ForkJoinPool$WorkQueue.runTask(ForkJoinPool.java:1339)
at scala.concurrent.forkjoin.ForkJoinPool.runWorker(ForkJoinPool.java:1979)
at scala.concurrent.forkjoin.ForkJoinWorkerThread.run(ForkJoinWorkerThread.java:107)
at ... run in separate thread using org.apache.spark.util.ThreadUtils ... ()
at org.apache.spark.streaming.StreamingContext.liftedTree1$1(StreamingContext.scala:578)
at org.apache.spark.streaming.StreamingContext.start(StreamingContext.scala:572)
at com.at.mall.realtime.dws.OrderDetailWideApp$.main(OrderDetailWideApp.scala:255)
at com.at.mall.realtime.dws.OrderDetailWideApp.main(OrderDetailWideApp.scala)