kafka-3.基础开发及消费者提交维护offset不同粒度方式

飘然渡沧海

已于 2022-03-09 18:19:30 修改

阅读量336

点赞数

分类专栏： kafka 文章标签： kafka 分布式 zookeeper apache

于 2022-03-08 14:45:41 首次发布

本文链接：https://blog.csdn.net/zhougubei/article/details/123353534

版权

kafka 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

kafka

实战啊，首先我们先清空下topic

删除topic

kafka-topics.sh --delete --zookeeper  node1/kafka  --topic ooxx

提示：

Topic ooxx is marked for deletion.
Note: This will have no impact if delete.topic.enable is not set to true.

此时的删除并不是真正的删除，而是把topic标记为删除：marked for deletion
但是我发现新版本里面zookeeper 也没有该topic，在查询list 得时候也不存在该topic

查看topic是否还存在

kafka-topics.sh  --zookeeper  node1/kafka  --list

代码

我前面得博客没有把外部得advertised.listeners打开，记得在这个后面添加主机得外部ip地址

创建Producer

package com.example.kafka.lesson;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.serialization.StringSerializer;
import org.junit.jupiter.api.Test;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;

/**
 * @author zjj
 * @create 2022/3/8 15:51
 * @Description:
 */
public class lesson01 {

    /**
     *  创建topic
     */
    @Test
    public void producer() throws ExecutionException, InterruptedException {

        String topic="test-items";
        Properties properties = new Properties();
        properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"172.16.111.47:9092,172.16.111.45:9092");
        properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
        properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());

        KafkaProducer kafkaProducer = new KafkaProducer(properties);


        for (int i = 0; i <3 ; i++) {
            for (int j = 0; j <3 ; j++) {
                ProducerRecord producerRecord = new ProducerRecord( topic,"item" + j, "val" + i);
                Future<RecordMetadata> send = kafkaProducer.send(producerRecord);
                // 强制阻塞返回结果
                RecordMetadata rm = send.get();
                int partition = rm.partition();
                long offset = rm.offset();
                System.out.println("key: "+producerRecord.key() +" val:"+producerRecord.value()+
                        " topic:"+topic+" partition:"+partition+" offset:"+offset );
            }
        }
    }

}

consumer (自动提交)

//一个运行的consumer ，那么自己会维护自己消费进度
//一旦你自动提交，但是是异步的
//1，还没到时间，挂了，没提交，重起一个consuemr，参照offset的时候，会重复消费
//2，一个批次的数据还没写数据库成功，但是这个批次的offset背异步提交了，挂了，重起一个consuemr，参照offset的时候，会丢失消费

 kafka-consumer-groups.sh --bootstrap-server  server1:19092  --list

kafka-consumer-groups.sh --bootstrap-server  server1:19092  --describe --group OOXX

 /**
     * 查看当前kafka分区
     * kafka-consumer-groups.sh --bootstrap-server node02:9092  --list
     */
    @Test
    public void  consumer(){

        String topic="topic-test1";

        Properties p = new Properties();
        p.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"server1:9092,server2:9092");
        p.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        p.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());


        //  properties.setProperty(KEY_DESERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());


        //消费的细节
         p.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"OOXX");
        //KAKFA IS MQ  IS STORAGE  第一次启动，米有offset
        /**
         *         "What to do when there is no initial offset in Kafka or if the current offset
         *         does not exist any more on the server
         *         (e.g. because that data has been deleted):
         *         <ul>  最早得
         *             <li>earliest: automatically reset the offset to the earliest offset
         *               等待链接时候消费 （默认从尾部开始）
         *             <li>latest: automatically reset the offset to the latest offset</li>
         *               剩下得是抛出异常
         *             <li>none: throw exception to the consumer if no previous offset is found for the consumer's group</li><li>anything else: throw exception to the consumer.</li>
         *         </ul>";
         */
        p.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        //自动提交时异步提交，容易出现 丢数据&&重复数据
        p.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"true");
        // 默认5秒后提交
        // properties.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"15000");
        // POLL 拉取数据，弹性，按需，拉取多少？
        // properties.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,"");

        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(p);

        consumer.subscribe(Arrays.asList(topic));

        while(true){

            // 有了立即返回   返回数据 0-n 多条数据
            ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(0));
            if(!records.isEmpty()) {

                System.out.println("-----------" + records.count() + "-------------");

                Iterator<ConsumerRecord<String, String>> iterator = records.iterator();

                while (iterator.hasNext()) {
                    // 因为一个consuemr可以消费多个分区，但是一个分区只能给一个组里的一个consuemr消费
                    ConsumerRecord<String, String> next = iterator.next();
                    // 归属那个分区
                    int partition = next.partition();
                    // 偏移量
                    long offset = next.offset();

                    System.out.println("key: " + next.key() + " val: " + next.value() + " partition: " + partition + " offset: " + offset);
                }
            }
        }



    }

消费数据
在这里插入图片描述

consumer 非主动提交

自己去维护offset 颗粒度

 /**
     * 查看当前kafka分区
     * kafka-consumer-groups.sh --bootstrap-server node02:9092  --list
     */
    @Test
    public void  consumer(){

        String topic="topic-test1";

        Properties p = new Properties();
        p.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"server1:9092,server2:9092");
        p.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
        p.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());


        //  properties.setProperty(KEY_DESERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());


        //消费的细节
         p.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"OOXX");
        //KAKFA IS MQ  IS STORAGE  第一次启动，米有offset
        /**
         *         "What to do when there is no initial offset in Kafka or if the current offset
         *         does not exist any more on the server
         *         (e.g. because that data has been deleted):
         *         <ul>  最早得
         *             <li>earliest: automatically reset the offset to the earliest offset
         *               等待链接时候消费 （默认从尾部开始）
         *             <li>latest: automatically reset the offset to the latest offset</li>
         *               剩下得是抛出异常
         *             <li>none: throw exception to the consumer if no previous offset is found for the consumer's group</li><li>anything else: throw exception to the consumer.</li>
         *         </ul>";
         */
        p.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        //自动提交时异步提交，容易出现 丢数据&&重复数据
        p.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
        // 默认5秒后提交
        // properties.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"15000");
        // POLL 拉取数据，弹性，按需，拉取多少？
        // properties.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,"");

        KafkaConsumer<String, String> consumer = new KafkaConsumer<>(p);

        consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() {
            @Override
            public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
                System.out.println("---onPartitionsRevoked:");
                Iterator<TopicPartition> iter = partitions.iterator();
                while(iter.hasNext()){
                    System.out.println("----   "+iter.next().partition());
                }
            }

            @Override
            public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
                System.out.println("---onPartitionsAssigned:");
                Iterator<TopicPartition> iter = partitions.iterator();

                while(iter.hasNext()){
                    System.out.println("----   "+iter.next().partition());
                }

            }
        });


        while(true){

            ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(0));
            if(!records.isEmpty()) {
                System.out.println("-----------" + records.count() + "-------------");
                // 每次poll得时候是取多个分区得数据
                Set<TopicPartition> partitions = records.partitions();
                /**
                 * 如果手动提交offset
                 * 1，按消息进度同步提交
                 * 2，按分区粒度同步提交
                 * 3，按当前poll的批次同步提交
                 *
                 * 思考：如果在多个线程下
                 * 1，以上1，3的方式不用多线程
                 * 2，以上2的方式最容易想到多线程方式处理，有没有问题？
                 */
                for (TopicPartition partition: partitions) {
                    List<ConsumerRecord<String, String>> records1 = records.records(partition);
                    //在一个微批里，按分区获取poll回来的数据
                    //线性按分区处理，还可以并行按分区处理用多线程的方式
                    Iterator<ConsumerRecord<String, String>> piter = records1.iterator();
                    while(piter.hasNext()){
                        ConsumerRecord<String, String> next = piter.next();
                        int par = next.partition();
                        long offset = next.offset();
                        String key = next.key();
                        String value = next.value();
                        long timestamp = next.timestamp();

                        System.out.println("key: "+ key+" val: "+ value+ " partition: "+par + " offset: "+ offset+"time:: "+ timestamp);


                        TopicPartition sp = new TopicPartition(topic, par);
                        OffsetAndMetadata om = new OffsetAndMetadata(offset);
                        HashMap<TopicPartition, OffsetAndMetadata> map = new HashMap<>();
                        map.put(sp,om);

                        consumer.commitSync(map);//这个是最安全的，每条记录级的更新，第一点
                        //单线程，多线程，都可以
                    }

                    long poff = records1.get(records1.size() - 1).offset();//获取分区内最后一条消息的offset

                    OffsetAndMetadata pom = new OffsetAndMetadata(poff);
                    HashMap<TopicPartition, OffsetAndMetadata> map = new HashMap<>();
                    map.put(partition,pom);
                    consumer.commitSync( map );//这个是第二种，分区粒度提交offset
                    /**
                     * 因为你都分区了
                     * 拿到了分区的数据集
                     * 期望的是先对数据整体加工
                     * 小问题会出现？  你怎么知道最后一条小的offset？！！！！
                     * 感觉一定要有，kafka，很傻，你拿走了多少，我不关心，你告诉我你正确的最后一个小的offset
                     */


                }

                //这个就是按poll的批次提交offset，第3点
                consumer.commitAsync();
            }


        }









        // consumer.subscribe(Arrays.asList(topic));
      /*  while(true){

            // 有了立即返回   返回数据 0-n 多条数据
            ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(0));
            if(!records.isEmpty()) {

                System.out.println("-----------" + records.count() + "-------------");

                Iterator<ConsumerRecord<String, String>> iterator = records.iterator();

                while (iterator.hasNext()) {
                    // 因为一个consuemr可以消费多个分区，但是一个分区只能给一个组里的一个consuemr消费
                    ConsumerRecord<String, String> next = iterator.next();
                    // 归属那个分区
                    int partition = next.partition();
                    // 偏移量
                    long offset = next.offset();

                    System.out.println("key: " + next.key() + " val: " + next.value() + " partition: " + partition + " offset: " + offset);
                }
            }
        }
*/



    }