Spark消费kafka代码

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.Decoder
import org.apache.spark.SparkException
import org.apache.spark.rdd.RDD
import org.apache.spark.streaming.StreamingContext
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.KafkaCluster.LeaderOffset
import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaCluster, KafkaUtils}
import scala.reflect.ClassTag


class MyKafkaClient(val kafkaParams: Map[String, String]) extends Serializable {
 
  private val kc = new KafkaCluster(kafkaParams)

  def createDirectStream[K: ClassTag, V: ClassTag, KD <: Decoder[K]: ClassTag, VD <: Decoder[V]: ClassTag](ssc: StreamingContext, topic: String): InputDStream[(K, V)] = {
    val partitionsEither = kc.getPartitions(Set(topic))
    if (partitionsEither.isLeft) throw new SparkException(s"get kafka partition failed: ${partitionsEither.left.get}")

    val partitions = partitionsEither.right.get
    val groupId = kafkaParams.get("group.id").get

    val offsets = setOrUpdateOffsets(partitions, groupId)
    KafkaUtils.createDirectStream[K, V, KD, VD, (K, V)](ssc, kafkaParams, offsets, (mmd: MessageAndMetadata[K, V]) => (mmd.key, mmd.message))
  }

  private def setOrUpdateOffsets(partitions: Set[TopicAndPartition], groupId: String): Map[TopicAndPartition, Long] = {
    val consumerOffsetEither = kc.getConsumerOffsets(groupId, partitions)
    if (consumerOffsetEither.isLeft) {
      val reset = kafkaParams.get("auto.offset.reset").map(_.toLowerCase)

      var leaderOffsets: Map[TopicAndPartition, LeaderOffset] = null
      if (reset == Some("smallest")) {
        val leaderOffsetsEither = kc.getEarliestLeaderOffsets(partitions)
        if (leaderOffsetsEither.isLeft) throw new SparkException(s"get earliest leader offsets failed: ${leaderOffsetsEither.left.get}")
        leaderOffsets = leaderOffsetsEither.right.get
      } else {
        val leaderOffsetsEither = kc.getLatestLeaderOffsets(partitions)
        if (leaderOffsetsEither.isLeft) throw new SparkException(s"get latest leader offsets failed: ${leaderOffsetsEither.left.get}")
        leaderOffsets = leaderOffsetsEither.right.get
      }

      val offsets = leaderOffsets.map {
        case (tp, offset) => (tp, offset.offset)
      }

      kc.setConsumerOffsets(groupId, offsets)
      offsets
    } else {
      val earliestLeaderOffsetsEither = kc.getEarliestLeaderOffsets(partitions)
      if (earliestLeaderOffsetsEither.isLeft) throw new SparkException(s"get earliest leader offsets failed: ${earliestLeaderOffsetsEither.left.get}")

      val earliestLeaderOffsets = earliestLeaderOffsetsEither.right.get
      val consumerOffsets = consumerOffsetEither.right.get

      var offsets: Map[TopicAndPartition, Long] = Map()
      consumerOffsets.foreach({ case (tp, n) =>
        val earliestLeaderOffset = earliestLeaderOffsets(tp).offset
        if (n < earliestLeaderOffset) {
          offsets += (tp -> earliestLeaderOffset)
        } else {
          offsets += (tp -> n)
        }
      })

      if (!offsets.isEmpty) {
        kc.setConsumerOffsets(groupId, offsets)
      }
      offsets
    }

      /**
        * 如果streaming程序执行的时候出现kafka.common.OffsetOutOfRangeException,
        * 说明zk上保存的offsets已经过时了,即kafka的定时清理策略已经将包含该offsets的文件删除。
        * 针对这种情况,只要判断一下zk上的consumerOffsets和earliestLeaderOffsets的大小,
        * 如果consumerOffsets比earliestLeaderOffsets还小的话,说明consumerOffsets已过时,
        * 这时把consumerOffsets更新为earliestLeaderOffsets
        */
  }

  def updateOffsets(rdd: RDD[(String, String)]): Unit = {
    val groupId = kafkaParams.get("group.id").get
    val offsetsList = rdd.asInstanceOf[HasOffsetRanges].offsetRanges
    for (offsets <- offsetsList) {
      val topicAndPartition = TopicAndPartition(offsets.topic, offsets.partition)
      val o = kc.setConsumerOffsets(groupId, Map((topicAndPartition, offsets.untilOffset)))
      if (o.isLeft) {
        println(s"Error updating the offset to Kafka cluster: ${o.left.get}")
      }
    }
  }
}

 

转载于:https://my.oschina.net/u/778683/blog/1828566

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值