kafka直连方式消费多个topic

一个消费者组可以消费多个topic,以前写过一篇一个消费者消费一个topic的,这次的是一个消费者组通过直连方式消费多个topic,做了小测试,结果是正确的,通过查看zookeeper的客户端,zookeeper记录了偏移量

package day04

/*
消费多个topic
 */
import kafka.common.TopicAndPartition
import kafka.message.MessageAndMetadata
import kafka.serializer.StringDecoder
import kafka.utils.{ZKGroupTopicDirs, ZkUtils}
import scala.collection.mutable.ListBuffer
import org.I0Itec.zkclient.ZkClient
import org.apache.spark.SparkConf
import org.apache.spark.streaming.dstream.InputDStream
import org.apache.spark.streaming.kafka.{HasOffsetRanges, KafkaUtils, OffsetRange}
import org.apache.spark.streaming.{Duration, StreamingContext}

object OrderDemoYY1 {
  def main(args: Array[String]): Unit = {
    val conf = new SparkConf().setAppName("yy").setMaster("local[*]")
    val ssc = new StreamingContext(conf,Duration(5000))
    //消费3个topic
    val topic1 = "wc"
    val topic2 ="wc1"
    val topic3 ="wc2"
    //组名
    val groupid ="GPMMVV"
    //zookeeper地址
    val zkQuorum = "hadoop01:2181,hadoop02:2181,hadoop03:2181"
    //brokerList
    val brokerList = "hadoop01:9092,hadoop02:9092,hadoop03:9092"
    //把消费的分区放到Set集合中,可以在第一次读取时作为参数传入
    val topics = Set(topic1,topic2,topic3)
    //ListBuffer时有序的,按下标有序
    val topicsList = ListBuffer[String](topic1,topic2,topic3)
    //设置kafka的参数
    val kafkaParams = Map(
      "metadata.broker.list"->brokerList,
      "groupid"->groupid,
      "auto.offset.reset"->kafka.api.OffsetRequest.SmallestTimeString
      //默认时从头开始读的
    )

    //new ListBuffer用来存放ZKGroupTopicDirs, 用来保存偏移量的地址
    //因为有多个topic,对应的也就有多个ZKGroupTopicDirs
    var zkGTList:ListBuffer[ZKGroupTopicDirs] =new  ListBuffer[ZKGroupTopicDirs]()
    //根据topicList 新建 ZKGroupTopicDirs 添加到zkGTList
    for(tp <- topicsList){
     val topicDirs = new ZKGroupTopicDirs(groupid,tp)
      zkGTList += topicDirs
    }
    //新建zkClient,用来获取偏移量和更新偏移量
    val zkClient = new ZkClient(zkQuorum)
    //新建一个InputDStream,要是var,因为有两种情况,消费过? 没有消费过? 根据情况赋值
    var kafkaDStream :InputDStream[(String,String)] = null
    //创建一个Map,(key,value)-》( 对应的时Topic和分区 ,偏移量)
    var fromOffset = Map[TopicAndPartition,Long]()

    //获取每个topic是否被消费过
    var childrens:ListBuffer[Int] =new ListBuffer[Int]()
    var flag = false  //有topic被消费过则为true
    for (topicDir <- zkGTList){ //循环存放偏移量的
      //通过zkClient.countChidren来获取每个topic对应的分区中的偏移量ZKGroupTopicDirs的对象
      val child: Int = zkClient.countChildren(topicDir.consumerOffsetDir)
      childrens +=  child
      if(child>0){
        flag = true
      }
    }


    if(flag){//消费过
      for(z <- 0 until topics.size){ //根据topicsList的的下表获取相应的child和ZKGroupTopicDirs
        val child = childrens(z)
        val gpDirs = zkGTList(z)
        val topicn = topicsList(z)
        for(i <- 0 until child){
          //循环child, 根据使用zkClient.readData方法,u获取topic的每个分区的偏移量
          val offset = zkClient.readData[String](gpDirs.consumerOffsetDir+"/"+i)
          val tp = new TopicAndPartition(topicn,i)
          fromOffset += tp -> offset.toLong
        }
      }
      //返回的而结果是 kafka的key,默认是null, value是kafka中的值
      val messageHandler =(mmd:MessageAndMetadata[String,String])=>{
        (mmd.key(),mmd.message())
      }
    //创建kafkaDStream
      kafkaDStream = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder,(String,String)](
        ssc,kafkaParams,fromOffset,messageHandler
      )
    }else{//以前没有读取过
      kafkaDStream = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](
        ssc,kafkaParams,topics
      )
    }

    /*val children1 = zkClient.countChildren(zKGroupTopicDirs1.consumerOffsetDir)
    val children2 = zkClient.countChildren(zKGroupTopicDirs2.consumerOffsetDir)
    if(children1>0 || children2>0){
      if(children1>0){
         for (i <- 0 until children1){
           val offset = zkClient.readData[String](zKGroupTopicDirs1.consumerOffsetDir+"/"+i)
           val tp  = new TopicAndPartition(topic1,i)
           fromOffset += tp ->offset.toLong
         }
      }
      if(children2>0){
        for (i <- 0 until children1){
          val offset = zkClient.readData[String](zKGroupTopicDirs2.consumerOffsetDir+"/"+i)
          val tp  = new TopicAndPartition(topic2,i)
          fromOffset += tp ->offset.toLong
        }
      }
      val messageHandler =(mmd:MessageAndMetadata[String,String])=>{
        (mmd.key(),mmd.message())
      }
      kafkaDStream = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder,(String,String)](ssc,
        kafkaParams,fromOffset,messageHandler)
    }else{
      kafkaDStream = KafkaUtils.createDirectStream[String,String,StringDecoder,StringDecoder](ssc,kafkaParams,topics)
    }*/


    var offsetRanges = Array[OffsetRange]()  //用来记录更新的每个topic的分区偏移量

    kafkaDStream.foreachRDD(kafkaRDD=>{
      //kafkaRDD是一个KafkaRDD,可以转换成HasOffsetRanges对象,从而获取offsetRanges
     offsetRanges= kafkaRDD.asInstanceOf[HasOffsetRanges].offsetRanges
      kafkaRDD.foreach(println)  //打印

      for(o <- offsetRanges){
        val topicNN: String = o.topic //获取topic
        val offset: Long = o.untilOffset //获取偏移量
        val partition: Int = o.partition  //获取分区
        val i = topicsList.indexOf(topicNN) //通过topicList查找topic的下标,找到与之对应的ZKGroupTopicDirs
        val gpDir = zkGTList(i)
        //通过ZkUtils更新偏移量
        ZkUtils.updatePersistentPath(zkClient,gpDir.consumerOffsetDir+"/"+partition,offset.toString)
        /*if(topicNN.equals(topic1)){
          ZkUtils.updatePersistentPath(zkClient,zKGroupTopicDirs1.consumerOffsetDir+"/"+partition,offset.toString)
        }else if(topicNN.equals(topic2)){
          ZkUtils.updatePersistentPath(zkClient,zKGroupTopicDirs2.consumerOffsetDir+"/"+partition,offset.toString)
        }*/
      }
    })

    ssc.start()
    ssc.awaitTermination()

  }

}

可以通过zookeeper的客户端,在/consumers中查看偏移量,
我的3个topic中,其中wc和wc1只有1个分区,可以通过下图可看出wc1的0分区偏移量13
在这里插入图片描述
wc的分区数只有一个,对应的分区号是0,偏移量是7
在这里插入图片描述
wc2中一共有3个分区,下图可以看出分区1中的偏移量是7
在这里插入图片描述

  • 22
    点赞
  • 52
    收藏
    觉得还不错? 一键收藏
  • 10
    评论
可以使用 Kafka Consumer API 来消费多个 topic 的消息。具体实现代码如下: ```c #include <stdio.h> #include <stdlib.h> #include <string.h> #include <librdkafka/rdkafka.h> int main(int argc, char **argv) { rd_kafka_t *rk; /* Kafka producer instance handle */ rd_kafka_conf_t *conf; /* Temporary configuration object */ char errstr[512]; /* librdkafka API error reporting buffer */ char *brokers; /* Kafka broker(s) */ char *topics; /* Topic list to consume from */ rd_kafka_topic_partition_list_t *topic_list; /* List of topics to subscribe to */ rd_kafka_resp_err_t err; /* librdkafka API error code */ /* Check arguments */ if (argc != 3) { fprintf(stderr, "Usage: %s <broker> <topic1,topic2,...>\n", argv[]); exit(1); } brokers = argv[1]; topics = argv[2]; /* Create Kafka client configuration place-holder */ conf = rd_kafka_conf_new(); /* Set bootstrap broker(s) as a comma-separated list of * host or host:port (default port is 9092). * librdkafka will use the bootstrap brokers to acquire the full * set of brokers from the cluster. */ if (rd_kafka_conf_set(conf, "bootstrap.servers", brokers, errstr, sizeof(errstr)) != RD_KAFKA_CONF_OK) { fprintf(stderr, "%s\n", errstr); exit(1); } /* Create Kafka producer instance */ rk = rd_kafka_new(RD_KAFKA_CONSUMER, conf, errstr, sizeof(errstr)); if (!rk) { fprintf(stderr, "Failed to create Kafka producer: %s\n", errstr); exit(1); } /* Create topic list */ topic_list = rd_kafka_topic_partition_list_new(1); if (!topic_list) { fprintf(stderr, "Failed to create topic list\n"); exit(1); } /* Parse topic list */ if (rd_kafka_topic_partition_list_add(topic_list, topics, RD_KAFKA_PARTITION_UA) != topic_list->cnt) { fprintf(stderr, "Failed to parse topic list: %s\n", rd_kafka_err2str(rd_kafka_last_error())); exit(1); } /* Subscribe to topic list */ err = rd_kafka_subscribe(rk, topic_list); if (err) { fprintf(stderr, "Failed to subscribe to topic list: %s\n", rd_kafka_err2str(err)); exit(1); } /* Consume messages */ while (1) { rd_kafka_message_t *msg; /* Poll for new messages */ msg = rd_kafka_consumer_poll(rk, 100); if (!msg) { continue; } /* Print message */ printf("Received message on topic %s (partition %d) at offset %ld:\n", rd_kafka_topic_name(msg->rkt), msg->partition, msg->offset); printf("%.*s\n", (int)msg->len, (char *)msg->payload); /* Free message */ rd_kafka_message_destroy(msg); } /* Destroy topic list */ rd_kafka_topic_partition_list_destroy(topic_list); /* Destroy Kafka producer instance */ rd_kafka_destroy(rk); return ; } ``` 以上代码实现了消费多个 topic 的消息,具体实现过程如下: 1. 创建 Kafka client configuration place-holder。 2. 设置 bootstrap broker(s)。 3. 创建 Kafka producer instance。 4. 创建 topic list。 5. 解析 topic list。 6. 订阅 topic list。 7. 消费消息。 8. 销毁 topic list 和 Kafka producer instance。 注意:以上代码仅供参考,实际使用时需要根据具体情况进行修改。
评论 10
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值