将kafka的offset保存到zk,当任务因为某些原因终止,一段时间内没有发现,再次重启会发生offset越界问题,是因为zk中保存到offset已过期,所以,任务启动时应该做一次判断
import java.util.Properties
import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
import org.apache.kafka.clients.CommonClientConfigs
import org.apache.kafka.clients.consumer.KafkaConsumer
import org.apache.kafka.common.TopicPartition
import org.apache.kafka.common.config.SaslConfigs
import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.spark.streaming.kafka010.OffsetRange
import org.slf4j.LoggerFactory
import scala.collection.JavaConverters._
import scala.collection.mutable
import scala.collection.mutable.{ListBuffer, Map}
class ZkKafkaOffsetManager(zkUrl: String) {
private val logger = LoggerFactory.getLogger(classOf[ZkKafkaOffsetManager])
private val zkClientAndConn = ZkUtils.createZkClientAndConnection(zkUrl, 30000, 30000);
private val zkUtils = new ZkUtils(zkClientAndConn._1, zkClientAndConn._2, false)
def readOffsets(brokerIP: String,offset:String,topics: Seq[String], groupId: String,sasl:String): Map[TopicPartition, Long] = {
val offsets = mutable.HashMap.empty[TopicPartition, Long]
val partitionsForTopics = zkUtils.getPartitionsForTopics(topics)
var map:scala.collection.mutable.Map[TopicPartition, Long] = scala.collection.mutable.Map[TopicPartition, Long](new TopicPartition("0",0)->0L)
map = beginning_off(brokerIP,groupId,offset,topics,sasl)
// /consumers/<groupId>/offsets/<topic>/<partition>
var endmap:scala.collection.mutable.Map[TopicPartition, Long] = end_off(brokerIP,groupId,offset,topics,sasl)
partitionsForTopics.foreach(partitions => {
val partitionsId = getPartition(brokerIP,groupId,offset,partitions._1,sasl)
val topic = partitions._1
val groupTopicDirs = new ZKGroupTopicDirs(groupId, topic)
partitionsId.foreach(partition => {
val path = groupTopicDirs.consumerOffsetDir + "/" + partition
try {
if(!zkUtils.pathExists(path)){
zkUtils.createPersistentPath(path,0L.toString)
if(offset.equals("earliest")){
offsets.put(new TopicPartition(topic, partition),map.getOrElse(new TopicPartition(topic, partition), 0L).longValue())
}else{
endmap.getOrElse(new TopicPartition(topic, partition), 0L).longValue()
}
}else {
val data = zkUtils.readData(path)
if (data != null) {
val mapValue = map.getOrElse(new TopicPartition(topic, partition), 0L)
if (mapValue.longValue() > data._1.toLong.longValue() || endmap.getOrElse(new TopicPartition(topic, partition), 0L).longValue() < data._1.toLong.longValue()) {
offsets.put(new TopicPartition(topic, partition), mapValue.longValue())
}
// if(mapValue.longValue() < data._1.toLong.longValue()){
// offsets.put(new TopicPartition(topic, partition), data._1.toLong)
// }else if(mapValue.longValue() > data._1.toLong.longValue()){
// if((map.getOrElse(new TopicPartition(topic, partition),0L).longValue() == endmap.getOrElse(new TopicPartition(topic, partition),0L).longValue()) || (data._1.toLong.longValue() > endmap.getOrElse(new TopicPartition(topic, partition),0L).longValue())){
// offsets.put(new TopicPartition(topic, partition), map.getOrElse(new TopicPartition(topic, partition),0L).longValue())
// }else{
// offsets.put(new TopicPartition(topic, partition), map.getOrElse(new TopicPartition(topic, partition),0L).longValue()+1L)
// }
// }else{
// offsets.put(new TopicPartition(topic, partition), data._1.toLong)
// }
logger.info(
"Read offset - topic={}, partition={}, offset={}, path={}",
Seq[AnyRef](topic, partition.toString, data._1, path)
)
}
}
} catch {
case ex: Exception =>
offsets.put(new TopicPartition(topic, partition), 0L)
logger.info(
"Read offset - not exist: {}, topic={}, partition={}, path={}",
Seq[AnyRef](ex.getMessage, topic, partition.toString, path)
)
}
})
})
offsets
}
def saveOffsets(offsetRanges: Seq[OffsetRange], groupId: String): Unit = {
offsetRanges.foreach(range => {
val groupTopicDirs = new ZKGroupTopicDirs(groupId, range.topic)
val path = groupTopicDirs.consumerOffsetDir + "/" + range.partition
zkUtils.updatePersistentPath(path, range.untilOffset.toString)
logger.info(
"Save offset - topic={}, partition={}, offset={}, path={}",
Seq[AnyRef](range.topic, range.partition.toString, range.untilOffset.toString, path)
)
})
}
def beginning_off(brokerIP: String,groupId:String,offset:String, topics :Iterable[String],sasl:String): scala.collection.mutable.Map[TopicPartition, Long] = {
val props = new Properties()
props.put("bootstrap.servers", brokerIP)
props.put("group.id", groupId)
props.put("auto.offset.reset", offset)
props.put("enable.auto.commit", "true")
props.put("key.deserializer", classOf[StringDeserializer])
props.put("value.deserializer", classOf[StringDeserializer])
props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_PLAINTEXT")
props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
props.put("sasl.jaas.config",
sasl)
var map:scala.collection.mutable.Map[TopicPartition, Long] = scala.collection.mutable.Map[TopicPartition, Long](new TopicPartition("0",0)->0L)
for(topic <- topics){
val consumer = new KafkaConsumer[String, String](props)
consumer.partitionsFor(topic).asScala.map { t =>
val tp = new TopicPartition(t.topic, t.partition)
consumer.assign(Seq(tp).asJava)
consumer.seekToBeginning(Seq(tp).asJava)
val off:Long = consumer.position(tp)
map+=(tp->off)
}
}
map
}
def getPartition(brokerIP: String,groupId:String,offset:String, topic :String,sasl:String): ListBuffer[Int] = {
val props = new Properties()
props.put("bootstrap.servers", brokerIP)
props.put("group.id", groupId)
props.put("auto.offset.reset", offset)
props.put("enable.auto.commit", "true")
props.put("key.deserializer", classOf[StringDeserializer])
props.put("value.deserializer", classOf[StringDeserializer])
props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_PLAINTEXT")
props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
props.put("sasl.jaas.config",
sasl)
val consumer = new KafkaConsumer[String, String](props)
val partitions:ListBuffer[Int] = ListBuffer()
consumer.partitionsFor(topic).asScala.map(partition=>
partitions += partition.partition()
)
partitions
}
def end_off(brokerIP: String,groupId:String,offset:String, topics :Iterable[String],sasl:String): scala.collection.mutable.Map[TopicPartition, Long] = {
val props = new Properties()
props.put("bootstrap.servers", brokerIP)
props.put("group.id", groupId)
props.put("auto.offset.reset", offset)
props.put("enable.auto.commit", "true")
props.put("key.deserializer", classOf[StringDeserializer])
props.put("value.deserializer", classOf[StringDeserializer])
props.put(CommonClientConfigs.SECURITY_PROTOCOL_CONFIG, "SASL_PLAINTEXT")
props.put(SaslConfigs.SASL_MECHANISM, "PLAIN")
props.put("sasl.jaas.config",
sasl)
var map:scala.collection.mutable.Map[TopicPartition, Long] = scala.collection.mutable.Map[TopicPartition, Long](new TopicPartition("0",0)->0L)
for(topic <- topics){
val consumer = new KafkaConsumer[String, String](props)
consumer.partitionsFor(topic).asScala.map { t =>
val tp = new TopicPartition(t.topic, t.partition)
consumer.assign(Seq(tp).asJava)
consumer.seekToEnd(Seq(tp).asJava)
val off:Long = consumer.position(tp)
map+=(tp->off)
}
}
map
}
}