SparkSreaming消费kafka数据

/*

导入依赖

<dependency>

    <groupId>org.apache.spark</groupId>

    <artifactId>spark-streaming-kafka-0-8_2.11</artifactId>

    <version>2.1.1</version>

</dependency>

*/

package com.huiyi.deve

import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.StringDecoder
import org.apache.kafka.clients.consumer.ConsumerConfig
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.{DStream, InputDStream}
import org.apache.spark.streaming.kafka.KafkaCluster.Err
import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaCluster, KafkaUtils, OffsetRange}
import org.apache.spark.streaming.{Seconds, StreamingContext}

import scala.collection.mutable

object KafkaStreaming {

  def main(args: Array[String]): Unit = {

    val sparkConf = new SparkConf()
    val ssc = new StreamingContext(sparkConf, Seconds(3))


    //kakfka参数声明
    val group = "mykafka"
    val topic = "event_log"
    val brokers = "hadoop101:9092,hadoop102:9092,hadoop103:9092"
    val deserialization = "org.apache.kafka.common.serialization.StringDeserializer"

    //定义kafka参数
    val kafkaPara: Map[String, String] = Map[String, String](
      ConsumerConfig.GROUP_ID_CONFIG -> group,
      ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG -> brokers,
      ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG -> deserialization,
      ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG -> deserialization

    )


    //创建KafkaCluster(维护offset)
    val kafkaCluster = new KafkaCluster(kafkaPara)

    val fromOffsets: Map[TopicAndPartition, Long] = getOffsetFromZookeper(kafkaCluster, group, Set(topic))

    //读取kafka创建DStream
    val kafkaDStream = KafkaUtils.createDirectStream[String, String, StringDecoder, StringDecoder, String](ssc
      , kafkaPara
      , fromOffsets
      , (x: MessageAndMetadata[String, String]) => x.message())


    //处理数据  根据自己的逻辑进行数据处理,比统计从kafka读取的数据 统计单词的个数
     val words: DStream[String] = kafkaDStream.flatMap(_.split(" "))
     val wordAndOne: DStream[(String, Int)] = words.map((_,1))
     val reduceWord: DStream[(String, Int)] = wordAndOne.reduceByKey(_+_)
    //打印
    reduceWord.print()

    //提交offset
    offsetToZookeeper(kafkaDStream, kafkaCluster, group)

    ssc.start()
    ssc.awaitTermination()

  }

  // 从zk中获取offset
  def getOffsetFromZookeper(kafkaCluster: KafkaCluster, group: String, kafkaTopicSet: Set[String]): Map[TopicAndPartition, Long] = {

    //创建Map 存储topic和分区对应的offset
    val topicAndPartitionOffsetMap = new mutable.HashMap[TopicAndPartition, Long]()

    //获取传入topic所有的分区
    val topicAndPartition: Either[Err, Set[TopicAndPartition]] = kafkaCluster.getPartitions(kafkaTopicSet)

    //如果成果获取到topic所有的分区
    if (topicAndPartition.isRight) {
      val partitions: Set[TopicAndPartition] = topicAndPartition.right.get

      //获取指定分区的offset
      val offsetInfo: Either[Err, Map[TopicAndPartition, Long]] = kafkaCluster.getConsumerOffsets(group, partitions)

      if (offsetInfo.isLeft) {

        //如果没有则offset信息存储为0
        for (top <- partitions) {
          topicAndPartitionOffsetMap += (top -> 0L)
        }
      } else {

        //如果有offset则存储为offset
        val offsets: Map[TopicAndPartition, Long] = offsetInfo.right.get

        for ((top, offset) <- offsets) {
          topicAndPartitionOffsetMap += (top -> offset)
        }
      }
    }

    topicAndPartitionOffsetMap.toMap

  }


  //提交offset
  def offsetToZookeeper(kafkaDStream: InputDStream[String], kafkaCluster: KafkaCluster, group: String): Unit = {
    kafkaDStream.foreachRDD {
      rdd =>
        //获取DStream中的offset
        val offsetsList: Array[OffsetRange] = rdd.asInstanceOf[HasOffsetRanges].offsetRanges

        //遍历offset信息,并更新到zookeeper中去

        for (offset <- offsetsList) {
          val topicAndPartition = TopicAndPartition(offset.topic, offset.partition)
          val ack: Either[Err, Map[TopicAndPartition, Short]] = kafkaCluster.setConsumerOffsets(group, Map((topicAndPartition, offset.untilOffset)))

          if (ack.isLeft) {
            println("Erro update offset to kafkaCluster")
          } else {
            println("successful update offset to kafkaCluser")
          }


        }


    }


  }


}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值