文章标题

最新推荐文章于 2023-10-19 15:03:50 发布

qq_39241618

最新推荐文章于 2023-10-19 15:03:50 发布

阅读量157

点赞数

分类专栏： kafka消费者实现文章标签： kafka-java 多线程数据对象

本文链接：https://blog.csdn.net/qq_39241618/article/details/78402528

版权

kafka消费者实现专栏收录该内容

1 篇文章 0 订阅

订阅专栏

public class KafkaConsumer implements Runnable {

private static final Logger LOGGER = LoggerFactory.getLogger(KafkaConsumer.class);
/**
 * Kafka数据消费对象
 */
private ConsumerConnector consumer ;

/**
 * Kafka Topic
 */
private String topic ;

/**
 * 线程数量，一般就是Topic的分区数量
 */
private int numThreads ;

/**
 * 线程池
 */
private ExecutorService executorPool ;

/**
 * 构造函数
 * @param topic      Kafka消息Topic主题
 * @param numThreads 处理数据的线程数/可以理解为Topic的分区数
 * @param zookeeper  Kafka的Zookeeper连接字符串
 * @param groupId    该消费者所属group ID的值
 */
public KafkaConsumer(String topic,int numThreads,String zookeeper,String groupId){

    // 1. 创建Kafka连接器
    this.consumer = Consumer.createJavaConsumerConnector(createConsumerConfig(zookeeper,groupId));
    // 2. 数据赋值
    this.topic = topic ;
    this.numThreads = numThreads ;
}
public void run() {
    // 1. 指定Topic
    Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
    topicCountMap.put(this.topic, this.numThreads);

    // 2. 指定数据的解码器
    StringDecoder keyDecoder = new StringDecoder(new VerifiableProperties());
    StringDecoder valueDecoder = new StringDecoder(new VerifiableProperties());

    // 3. 获取连接数据的迭代器对象集合
    /**
     * Key: Topic主题
     * Value: 对应Topic的数据流读取器，大小是topicCountMap中指定的topic大小
     */
    Map<String, List<KafkaStream<String, String>>> consumerMap = this.consumer.createMessageStreams(topicCountMap, keyDecoder, valueDecoder);

    // 4. 从返回结果中获取对应topic的数据流处理器
    List<KafkaStream<String, String>> streams = consumerMap.get(this.topic);

    // 5. 创建线程池
    this.executorPool = Executors.newFixedThreadPool(this.numThreads);

    // 6. 构建数据输出对象
    int threadNumber = 0;
    for (final KafkaStream<String, String> stream : streams) {
        this.executorPool.submit(new ConsumerKafkaStreamProcesser(stream, threadNumber));
        threadNumber++;
    }
}

public void  shutDown(){
    // 1. 关闭和Kafka的连接，这样会导致stream.hashNext返回false
    if (this.consumer != null) {
        this.consumer.shutdown();
    }
    // 2. 关闭线程池，会等待线程的执行完成
    if (this.executorPool != null) {
        // 2.1 关闭线程池
        this.executorPool.shutdown();

        // 2.2. 等待关闭完成, 等待五秒
        try {
            if (!this.executorPool.awaitTermination(5, TimeUnit.SECONDS)) {
                System.out.println("Timed out waiting for consumer threads to shut down, exiting uncleanly!!");
            }
        } catch (InterruptedException e) {
            System.out.println("Interrupted during shutdown, exiting uncleanly!!");
        }
    }

}
/**
 * 根据传入的zk的连接信息和groupID的值创建对应的ConsumerConfig对象
 *  zk的连接信息，类似于：<br/>  hadoop-senior01.ibeifeng.com:2181,hadoop-senior02.ibeifeng.com:2181/kafka
 *   该kafka consumer所属的group id的值， group id值一样的kafka consumer会进行负载均衡
 * @return Kafka连接信息
 */
private ConsumerConfig createConsumerConfig(String zookeeper ,String groupId){
    // 1.构建属性对象
    Properties prop = new Properties();
    // 2.添加相关属性
    prop.put("group.id", groupId); // 指定分组id
    prop.put("zookeeper.connect", zookeeper); // 指定zk的连接url
    prop.put("zookeeper.session.timeout.ms", "400"); //
    prop.put("zookeeper.sync.time.ms", "200");
    prop.put("auto.commit.interval.ms", "1000");

 /*   prop.put("group.id",ConsumerPropertiesFactory.groupId); //指定分组ID
    prop.put("zookeeper.connect",ConsumerPropertiesFactory.zookeeperConnect);//指定zk的连接url
    prop.put("metadata.broker.list",ConsumerPropertiesFactory.metadataBrokerList);
    prop.put("auto.offset.reset", ConsumerPropertiesFactory.autoOffsetReset);
    prop.put("zookeeper.session.timeout.ms", "400"); //
    prop.put("zookeeper.sync.time.ms", "200");
    prop.put("auto.commit.interval.ms", "1000");*/
    // 3.构建ConsumerConfig
    return new ConsumerConfig(prop);
}

}
上面就是线程的方式实现从kafka拉取数据。实现的关键点 kafka的分区数一般是对应的线程数。
kafka生产者底层实现了控制在一定数据大小或者时间超时就会切换partition生产数据，每个partition只能同时被一个线程消费。
public class ConsumerKafkaStreamProcesser implements Runnable {

private static final Logger LOGGER = LoggerFactory.getLogger(ConsumerKafkaStreamProcesser.class);

// Kafka数据流
private KafkaStream<String,String> stream ;
//线程ID编号
private int threadNumber ;

/**
 * 构造函数
 */
public ConsumerKafkaStreamProcesser(KafkaStream<String,String> stream,int threadNumber){
    this.stream = stream ;
    this.threadNumber = threadNumber ;
}

public void run() {
    // 1. 获取数据迭代器
    ConsumerIterator<String, String> iter = this.stream.iterator();
    // 2. 迭代输出数据
    while (iter.hasNext()) {
        // 2.1 获取数据值
        MessageAndMetadata value = iter.next();
        // 2.2 输出
        LOGGER.info(this.threadNumber + ":" + ":" + value.offset() + value.key() + ":" + value.message());
        System.out.println(this.threadNumber + ":" + ":" + value.offset() + value.key() + ":" + value.message() + "partition ..." + value.partition());
    }
    // 3. 表示当前线程执行完成
    LOGGER.info("Shutdown Thread:" + this.threadNumber);
    System.out.println("Shutdown Thread:" + this.threadNumber);
}

}

这里是topic的输出处理，也是写在了线程中打印的log中有详细的解释

public class App
{
public static void main( String[] args )
{
String zookeeper = “192.168.18.128:2181” ;
String groupId = “group1”;
String topic1 = “testTopic4”;
String topic2 = “testTopic3”;
int threads = 10 ;

    KafkaConsumer instance = new KafkaConsumer(topic1,threads,zookeeper,groupId);
    KafkaConsumer instance1 = new KafkaConsumer(topic2,threads,zookeeper,groupId);
        new Thread(instance).start();
        new Thread(instance1).start();

   // int sleepMillis = 30000 ;
   /* try {
        Thread.sleep(sleepMillis);
    } catch (InterruptedException e) {
        e.printStackTrace();
    }*/
}

}
最后这里是测试类，简单提一下。实际生产中，kafka的连接配置都是写在resources中的。
拿到数据后对接数据库做保存处理的连接数据库配置也是写在resource中。静态加载速度较快。
如果没有必要可以不使用多线程的方式，切换线程也是要费效率的