kafka[低阶api & 高阶api & 新api区别|auto.offset.reset参数选择]_CodingPark编程公园


文章介绍
本文主要介绍了kafka低阶api & 高阶api & 新api区别以及auyo.offset.reset的参数如何使用
文章开头展示这张图的意义在于让读者清楚看到Broker |Partition |rep 三者关系


低阶api & 高阶api & 新api区别

低阶 API 的特点

优点
● 开发者自己控制offset,想从哪里读取就从哪里读取。
● 自行控制连接分区,对分区自定义进行负载均衡
● 对 zookeeper 的依赖性降低(如:offset 不一定非要靠 zk 存储,自行存储offset 即可,比如存在文件或者内存中)

缺点
● 太过复杂,需要自行控制 offset,连接哪个分区,找到分区 leader 等

package com.csdn.kafka.consumer;

import kafka.api.FetchRequestBuilder;
import kafka.api.PartitionOffsetRequestInfo;
import kafka.common.TopicAndPartition;
import kafka.javaapi.*;
import kafka.javaapi.consumer.SimpleConsumer;
import kafka.javaapi.message.ByteBufferMessageSet;
import kafka.message.Message;
import kafka.message.MessageAndOffset;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooKeeper;
import util.ZkUtil;

import java.nio.ByteBuffer;
import java.util.*;

/**
 * Created by ag on 2020/5/9.
 */
public class SimpleConsumerAPI {


    public static void main(String[] args)throws Exception {

        String zkString = "192.168.1.115:2181,192.168.1.116:2181,192.168.1.117:2181";
        String broker = "192.168.1.115";

        int port = 9092;
        int buffersize = 64*1024;
        String clientId = "clientId";
        String topic = "test";

        long whichTime = kafka.api.OffsetRequest.EarliestTime();

        int timeout = 6000;
        ZooKeeper zk = new ZooKeeper(zkString, timeout, new Watcher() {
            public void process(WatchedEvent watchedEvent) {
                System.out.println(watchedEvent);
            }
        });

        List<String> partitions = zk.getChildren("/brokers/topics/test/partitions", true);
        System.out.println(partitions);

        for(String p:partitions){
            int partition = Integer.valueOf(p);
            String leader = getLeader(timeout, broker, port, partition,
                    buffersize, clientId, topic);

            byte[] data = ZkUtil.getData("/consumers/test/testgroup/" + partition);
            long readOffset = Long.valueOf(new String(data).trim());
            System.out.println(readOffset);

            new Thread(new ReadDataTask(timeout, port, partition, buffersize,
                    clientId, topic, leader, readOffset,14110)).start();

        }

    }

    private static void fetchData(int timeout, int port, int partition, int buffersize,
                                  String clientId, String topic, String leader, long readOffset) {
        SimpleConsumer simpleConsumer = new SimpleConsumer(leader,port,timeout,buffersize,clientId);
        kafka.api.FetchRequest request = new FetchRequestBuilder()
                .addFetch(topic,partition,readOffset,100000)
                .clientId(clientId)
                .build();
        FetchResponse fetch = simpleConsumer.fetch(request);
        ByteBufferMessageSet messageAndOffsets = fetch.messageSet(topic, partition);
        Iterator<MessageAndOffset> iterator = messageAndOffsets.iterator();
        while(iterator.hasNext()){
            MessageAndOffset next = iterator.next();
            long offset = next.offset();
            long nextoffset = next.nextOffset();
            Message message = next.message();
            ByteBuffer payload = message.payload();
            byte[] bytes = new byte[payload.limit()];
            payload.get(bytes);
            System.out.println(partition
                    +"\t"+new String (bytes)+"\t"+offset+"\t"+nextoffset);
        }
    }


    private static long getReadOffset(long whichTime,int timeout, int port,
                                      int partition, int buffersize, String clientId, String topic, String leader) {
        SimpleConsumer offsetConsumer = new SimpleConsumer(leader,port,timeout,buffersize,clientId);
        Map<TopicAndPartition, PartitionOffsetRequestInfo> requestInfo
                = new HashMap<TopicAndPartition, PartitionOffsetRequestInfo>();

        TopicAndPartition topicAndPartition = new TopicAndPartition(topic,partition);
        PartitionOffsetRequestInfo partitionOffsetRequestInfo = new PartitionOffsetRequestInfo(whichTime,1);

        requestInfo.put(topicAndPartition,partitionOffsetRequestInfo);

        OffsetRequest offsetRequest =
                new OffsetRequest(requestInfo,kafka.api.OffsetRequest.CurrentVersion(),clientId);
        OffsetResponse offsetsBefore = offsetConsumer.getOffsetsBefore(offsetRequest);
        long[] offsets = offsetsBefore.offsets(topic, partition);
        return offsets[0];
    }

    private static String getLeader(int timeout, String broker, int port,
                                    int partition, int buffersize, String clientId, String topic) {
        String leader = "";
        SimpleConsumer leaderConsumer = new SimpleConsumer(broker,port,timeout,buffersize,clientId);
        TopicMetadataRequest topicMetadataRequest = new TopicMetadataRequest(Collections.singletonList(topic));
        TopicMetadataResponse topicMetadataResponse = leaderConsumer.send(topicMetadataRequest);
        List<TopicMetadata> topicMetadatas = topicMetadataResponse.topicsMetadata();
        for(TopicMetadata topicMetadata:topicMetadatas){
            List<PartitionMetadata> partitionMetadatas = topicMetadata.partitionsMetadata();
            for(PartitionMetadata partitionMetadata:partitionMetadatas){
                if(partitionMetadata.partitionId() == partition){
                    leader =  partitionMetadata.leader().host();
                }
            }
        }
        return leader;
    }
}


class ReadDataTask implements Runnable{

    int timeout;
    int port;
    int partition;
    int buffersize;
    String clientId;
    String topic;
    String leader;
    long readOffset;
    long stopOffset;

    public ReadDataTask(int timeout, int port, int partition,
                        int buffersize, String clientId,
                        String topic, String leader, long readOffset,long stopOffset) {
        this.timeout = timeout;
        this.port = port;
        this.partition = partition;
        this.buffersize = buffersize;
        this.clientId = clientId;
        this.topic = topic;
        this.leader = leader;
        this.readOffset = readOffset;
        this.stopOffset= stopOffset;
    }

    public void run() {
        boolean flag =  true;
        int count = 0;
        while(flag){
            SimpleConsumer simpleConsumer = new SimpleConsumer(leader,port,timeout,buffersize,clientId);
            kafka.api.FetchRequest request = new FetchRequestBuilder()
                    .addFetch(topic,partition,readOffset,100000)
                    .clientId(clientId)
                    .build();
            FetchResponse fetch = simpleConsumer.fetch(request);
            ByteBufferMessageSet messageAndOffsets = fetch.messageSet(topic, partition);
            Iterator<MessageAndOffset> iterator = messageAndOffsets.iterator();
            while(iterator.hasNext()){
                count ++;
                MessageAndOffset next = iterator.next();
                long offset = next.offset();
                if(offset>stopOffset){
                    flag = false;
                    break;
                }
                long nextoffset = next.nextOffset();
                readOffset = nextoffset;
                Message message = next.message();
                ByteBuffer payload = message.payload();
                byte[] bytes = new byte[payload.limit()];
                payload.get(bytes);
                System.out.println(Thread.currentThread().getName()+"\t"+partition
                        +"\t"+new String (bytes)+"\t"+offset+"\t"+nextoffset);
            }

            try {
                ZkUtil.setData("/consumers/test/testgroup/"+partition,readOffset+"");
            } catch (Exception e) {
                e.printStackTrace();
            }


            if(count ==0){
                try {
                    Thread.sleep(1000);
                    System.out.println(Thread.currentThread().getName()+":sleep 1000ms");
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }

            count = 0;
        }

    }
}


 

线程与分区的关系
3个分区 2个线程 ——> 一个线程消费两个,另一个线程消费一个(线程一旦消费了某分区则不再中途变换)
3个分区 3个线程 ——> 每一个线程消费一个
3个分区 5个线程 ——> 5个线程都进来了,但是只有3个干活

高阶 API 的特点

优点
● 高级API写起来简单
● 不需要去自行去管理offset,系统通过zookeeper自行管理
● 不需要管理分区,副本等情况,系统自动管理
● 消费者断线会自动根据上一次记录在 zookeeper中的offset去接着获取数据(默认设置5s更新一下 zookeeper 中存的的offset),版本为0.10.2
● 可以使用group来区分对访问同一个topic的不同程序访问分离开来(不同的group记录不同的offset,这样不同程序读取同一个topic才不会因为offset互相影响)

缺点
● 不能自行控制 offset(对于某些特殊需求来说)
● 不能细化控制如分区、副本、zk 等

package com.csdn.kafka.consumer;


import kafka.consumer.Consumer;
import kafka.consumer.ConsumerConfig;
import kafka.consumer.ConsumerIterator;
import kafka.consumer.KafkaStream;
import kafka.javaapi.consumer.ConsumerConnector;
import kafka.message.MessageAndMetadata;

import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

/**
 set /consumers/testgroup2/offsets/test/0 14000
 set /consumers/testgroup2/offsets/test/1 14000
 set /consumers/testgroup2/offsets/test/2 14000
 */
public class HighLevelConsumer {


    public static void main(String[] args) throws Exception {

        Properties props = new Properties();
        props.put("zookeeper.connect", "192.168.1.115:2181,192.168.1.116:2181,192.168.1.117:2181");
        props.put("group.id", "last");
        props.put("zookeeper.session.timeout.ms", "4000");
        props.put("zookeeper.sync.time.ms", "200");
        props.put("auto.commit.interval.ms", "5000");
        props.put("auto.offset.reset", "largest");//largest,smallest
        props.put("auto.commit.enable", "true");

        ConsumerConfig config = new ConsumerConfig(props);
        ConsumerConnector javaConsumerConnector =
                Consumer.createJavaConsumerConnector(config);

        Map<String, Integer> topicCountMap = new HashMap<String, Integer>();
        String topic = "test";
        topicCountMap.put(topic,3);
        Map<String, List<KafkaStream<byte[], byte[]>>> messageStreams =
                javaConsumerConnector.createMessageStreams(topicCountMap);

        List<KafkaStream<byte[], byte[]>> kafkaStreams = messageStreams.get(topic);
        for(KafkaStream<byte[], byte[]> kafkaStream:kafkaStreams){
            new Thread(new ReadDataHigh(kafkaStream)).start();
        }

//        Thread.sleep(12000);
//        javaConsumerConnector.shutdown();
//        System.out.println("------------------------------------------------------------");


    }
}


class ReadDataHigh implements Runnable{

    KafkaStream<byte[], byte[]> kafkaStream;

    public ReadDataHigh(KafkaStream<byte[], byte[]> kafkaStream) {
        this.kafkaStream = kafkaStream;
    }

    public void run() {
        ConsumerIterator<byte[], byte[]> iterator = kafkaStream.iterator();
        System.out.println(Thread.currentThread().getName()+"==============================");
        while(iterator.hasNext()){
            try {
                Thread.sleep(1000);
            } catch (InterruptedException e) {
                e.printStackTrace();
            }
            MessageAndMetadata<byte[], byte[]> next = iterator.next();
            int partition = next.partition();
            long offset = next.offset();
            byte[] message = next.message();
            System.out.println(Thread.currentThread().getName()+"\t"
                    +partition+"\t"+offset+"\t"+new String(message));
        }
    }
}


 

package com.csdn.kafka.consumer;


import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;

import java.util.*;

public class NewConsumer {

    public static void main(String[] rgs) throws InterruptedException {

//        autoOffset();
        manualOffset();

//        int partition = Math.abs("testasdf1".hashCode()) % 50 ;
//        System.out.println(partition);


    }

    private static void manualOffset() {
        Properties props = new Properties();
        props.put("bootstrap.servers", "192.168.1.115:9092,192.168.1.116:9092,192.168.1.117:9092");
        props.put("group.id", "testasdf1");
        props.put("enable.auto.commit", "false");
        props.put("auto.commit.interval.ms", "1000");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("auto.offset.reset","earliest");//默认是lastest; latest, earliest
        KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        String topic = "test";
//        kafkaConsumer.subscribe(Collections.singletonList(topic));

        TopicPartition topicPartiton0 = new TopicPartition(topic,0);
        TopicPartition topicPartiton1 = new TopicPartition(topic,1);
        TopicPartition topicPartiton2 = new TopicPartition(topic,2);
        kafkaConsumer.assign(Arrays.asList(topicPartiton0,topicPartiton1,topicPartiton2));
        kafkaConsumer.seek(topicPartiton0,21000);
        kafkaConsumer.seek(topicPartiton1,21000);
        kafkaConsumer.seek(topicPartiton2,21000);

        int count = 100;
        List<String> values = new ArrayList<String>();
        while(true){
            ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
            for(ConsumerRecord<String,String> record:records){
                int partition = record.partition();
                long offset = record.offset();
                String value = record.value();
                values.add(value);
                System.out.println(partition+"\t"+offset+"\t"+value);
            }

            if(values.size()>count){
                kafkaConsumer.commitSync();
                values.clear();
                System.out.println("manual commit offset");
            }
        }
    }

    private static void autoOffset() {
        Properties props = new Properties();
        props.put("bootstrap.servers", "192.168.1.115:9092,192.168.1.116:9092,192.168.1.117:9092");
        props.put("group.id", "testasdfg");
        props.put("enable.auto.commit", "true");
        props.put("auto.commit.interval.ms", "1000");
        props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
        props.put("auto.offset.reset","earliest");//默认是lastest; latest, earliest
        KafkaConsumer<String,String> kafkaConsumer = new KafkaConsumer<String, String>(props);
        String topic = "test";
        kafkaConsumer.subscribe(Collections.singletonList(topic));
        while(true){
            ConsumerRecords<String, String> records = kafkaConsumer.poll(100);
            for(ConsumerRecord<String,String> record:records){
                int partition = record.partition();
                long offset = record.offset();
                String value = record.value();
                System.out.println(partition+"\t"+offset+"\t"+value);
            }
        }
    }


}


相关资料

📍kafka高级api和低级api的区别和联系
https://blog.csdn.net/u011750989/article/details/81055891?ops_request_misc=&request_id=&biz_id=102&utm_term=kafka低阶与高阶api&utm_medium=distribute.pc_search_result.none-task-blog-2allsobaiduweb~default-1-81055891
📍Kafka Consumer接<high-level&low-level>
https://www.cnblogs.com/fxjwind/p/3794255.html

✏️知识点补充


auto.offset.reset参数选择
auto.offset.reset(largest|smallest|latest|earliest|none)

largest
auto.offset.reset参数设置为largest也就是从最大offset重新开始消费。
配置成largest的缺点:可能导致有的消息不会消费。如果队列中offset=202的消息后面还有offset=203和offset=204的消息,并且203和204未被consumer消费,配置为largest就会让consumer认为offset=204已经消费完成,然后从offset=205开始消费,203和204就不会被消费了。
出现遗漏

smallest
配置成smallest的缺点:第一,可能导致重复消费。第二,也是最让人头疼的,是当broker中记录的消息很多的时候,从最小offset开始消费会导致大量的消息积压,导致新的消息在很长时间内无法被消费。
出现重复
所以具体使用哪个配置得具体情况具体分析。这个参数默认是largest。

earliest
当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,从头开始消费

latest
当各分区下有已提交的offset时,从提交的offset开始消费;无提交的offset时,消费新产生的该分区下的数据

none
topic各分区都存在已提交的offset时,从offset后开始消费;只要有一个分区不存在已提交的offset,则抛出异常

相关资料

📍auto.offset.reset解释 largest
https://blog.csdn.net/linux_ja/article/details/43201501?utm_medium=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-9&depth_1-utm_source=distribute.pc_relevant.none-task-blog-BlogCommendFromBaidu-9
📍Kafka auto.offset.reset值详解
https://blog.csdn.net/lishuangzhe7047/article/details/74530417/
 

  • 0
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值