spark-streaming的direct方式保存offset到redis

package bi

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.StringDecoder
import org.apache.kafka.clients.consumer.{ConsumerConfig, ConsumerRecord}
import org.apache.kafka.common.TopicPartition
import org.apache.spark.streaming.kafka.{HasOffsetRanges, OffsetRange}
//import org.apache.kafka.common.serialization.StringDeserializer
import org.apache.log4j.{Level, Logger}
import org.apache.spark.SparkConf
import org.apache.spark.sql.SparkSession
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.KafkaUtils
import org.apache.spark.streaming.{Seconds, StreamingContext}
import pool.CreateRedisPoolTest

import scala.collection.JavaConverters._
import scala.util.Try


object PileStateTest {
  def main(args: Array[String]): Unit = {
    Logger.getLogger("org.apache.spark").setLevel(Level.INFO)
    val sparkConf=new SparkConf().setMaster("local[*]").setAppName("streaming")
    val spark=SparkSession.builder().config(sparkConf).getOrCreate()

    @transient
    val sc=spark.sparkContext
    val ssc=new StreamingContext(sc,Seconds(10))
    val topic = Array("saas-pile-state")
    val groupId = "streaming_test"


    val kafkaParams = Map[String, String](
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> "172.16.1.187:9092",
      ConsumerConfig.AUTO_OFFSET_RESET_CONFIG -> "largest",
      ConsumerConfig.GROUP_ID_CONFIG -> groupId,
      ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG->"false"
    )



    //获取ds
    val lines = createStreamingContextRedis(ssc, topic, kafkaParams)


    lines.foreachRDD(rdd=>{
      rdd.asInstanceOf[HasOffsetRanges].offsetRanges
      storeOffset(rdd.asInstanceOf[HasOffsetRanges].offsetRanges,groupId)
      println(rdd.count())
      rdd.saveAsTextFile("C:\\Users\\nc\\Desktop\\py\\1.txt")
      rdd.foreach(x=>{
        println(x._1+"\t"+x._2)

      })

    })



    ssc.start()
    ssc.awaitTermination()
  }


  //创建DStream
  def createStreamingContextRedis(ssc: StreamingContext, topic: Array[String],
                                  kafkaParams: Map[String, String]): InputDStream[(String, String)] = {
    var resDS: InputDStream[(String, String)]= null
    val groupId = kafkaParams.get("group.id").get
    val (fromOffSet, flag) = getOffset(topic, groupId.toString)
    val offsetReset = kafkaParams.get("auto.offset.reset").get

   // if (flag == 1 && offsetReset.equals("latest")) {
    if (flag == 1 && offsetReset.equals("largest")) {
      val messageHandler = (mmd: MessageAndMetadata[String, String]) => (mmd.topic, mmd.message())
      println("从获取偏移量创建DStream")
      resDS=KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder, (String, String)](ssc, kafkaParams, fromOffSet, messageHandler)
    } else {
     println("直接创建DStream")
      resDS=KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder](ssc, kafkaParams, topic.toSet)
    }
    resDS
  }

  //从redis获取每个topic和partition对应的offset
  def getOffset(topics: Array[String], groupId: String): (Map[TopicAndPartition, Long], Int) = {
    val fromOffSets = scala.collection.mutable.Map[TopicAndPartition, Long]()
    val redisPool=CreateRedisPoolTest()
    val jedis=redisPool.borrowObject()
    topics.foreach(topic => {
      val keys=jedis.keys(s"bi_kafka_offset_${groupId}_${topic}*")
      if (!keys.isEmpty) {
        keys.asScala.foreach(key => {
          val offset = jedis.get(key)
          val partition = Try(key.split(s"bi_kafka_offset_${groupId}_${topic}_").apply(1)).getOrElse("0")
          val tp=TopicAndPartition(topic,partition.toInt)
          fromOffSets.put(tp, offset.toLong)
        })
      }
    })
    redisPool.returnObject(jedis)
    if (fromOffSets.isEmpty) {
      (fromOffSets.toMap, 0)
    } else {
      (fromOffSets.toMap, 1)
    }
  }

    //保存offsert
  def storeOffset(ranges: Array[OffsetRange], groupId: String): Unit = {
    val redisPool=CreateRedisPoolTest()
    val jedis=redisPool.borrowObject()

    for (o <- ranges) {
      val key = s"bi_kafka_offset_${groupId}_${o.topic}_${o.partition}"
      val value = o.untilOffset
      jedis.set(key, value.toString)
    }
    redisPool.returnObject(jedis)
  }

}
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值