spark-streaming的direct方式保存offset到zookeeper

  1. maven依赖
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-core_2.11</artifactId>
            <scope>provided</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-sql_2.11</artifactId>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming_2.11</artifactId>
            <scope>provided</scope>
        </dependency>

        <dependency>
            <groupId>org.apache.spark</groupId>
            <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>
            <version>2.1.1</version>
        </dependency>

2.直接上代码

object BIStreamingZK {
    
    def main(args: Array[String]): Unit = {
    logger.setLevel(Level.WARN)

    val sparkConf=new SparkConf().setAppName("bi_stream_analyse_zk")
    //开启背压机制
    sparkConf.set("spark.streaming.backpressure.enabled","true")

    //max=partiions*5*100=1500
    sparkConf.set("spark.streaming.kafka.maxRatePerPartition","100")
    val ssc = new StreamingContext(sparkConf,Seconds(5))

    //topics
    val topics = Set("topic1","topic2")
    //groupid
    val groupId="bi_stream_analyse_bill_state_zk"

    val kfkParams = Map[String, String](
      "zookeeper.connect"->"setver1:21810,server2:21810,server3:21810",
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "kafkaserver1:9092,kafkaserver2:9092,kafkaserver3:9092",
      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "largest",
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG->"org.apache.kafka.common.serialization.StringSerializer",
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG->"org.apache.kafka.common.serialization.StringSerializer",
      ConsumerConfig.GROUP_ID_CONFIG -> groupId
    )

    //创建KafkaCluster(维护offset)
    val kafkaCluster = new KafkaCluster(kfkParams)

    //获取offset
    val fromOffset:Map[TopicAndPartition,Long]=getOffset(kafkaCluster,topics,groupId)

    //创建DStream
    val kafkaDStream: InputDStream[String] = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder,String](ssc,
      kfkParams,
      fromOffset,
      (message: MessageAndMetadata[String, String]) => message.message())


    //处理业务
    kafkaDStream.foreachRDD( rdd => {
      if (!rdd.isEmpty()) {
        dealRdd(rdd)
      }
    })


    //保存Offset
    setOffset(kafkaCluster,kafkaDStream,groupId)


    ssc.start()
    ssc.awaitTermination()

  }


def getOffset(kafkaCluster: KafkaCluster, topics: Set[String], groupId: String): Map[TopicAndPartition, Long] = {
    var partitionToLong = new HashMap[TopicAndPartition,Long]

    //获取每个分区
    val topicAndPartionsEither: Either[Err, Set[TopicAndPartition]] = kafkaCluster.getPartitions(topics)

    //topic存在
    if (topicAndPartionsEither.isRight) {

      val topicAndPartions: Set[TopicAndPartition] = topicAndPartionsEither.right.get


      val topicAndPartionToLongEither: Either[Err, Map[TopicAndPartition, Long]] = kafkaCluster.getConsumerOffsets(groupId,topicAndPartions)

      if (topicAndPartionToLongEither.isLeft) {
        //offset从未消费:此处offset置为0,但是kafkaoffset最小过期大于0就会异常,后面处理
        for(topicAndPartion<-topicAndPartions){
          partitionToLong += (topicAndPartion -> 0L)
        }

      }else{
        //offset消费过,超过kafka七天保存会异常
        val value: Map[TopicAndPartition, Long] = topicAndPartionToLongEither.right.get
        partitionToLong++=value
      }

    }

    partitionToLong
  }

  def setOffset(kafkaCluster: KafkaCluster, kafkaDstream: InputDStream[String], groupId: String): Unit = {

    kafkaDstream.foreachRDD(rdd=>{

      var partitionToLong = new HashMap[TopicAndPartition,Long]

      val offSetranges: HasOffsetRanges = rdd.asInstanceOf[HasOffsetRanges]

      val ranges: Array[OffsetRange] = offSetranges.offsetRanges

      for(range<-ranges){
        partitionToLong+=(range.topicAndPartition()->range.untilOffset)
      }

      kafkaCluster.setConsumerOffsets(groupId,partitionToLong)

    })

  }



}

 

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值