kafka自定义分区，数据流向指定分区，消费者组的消费者消费指定分区（java）

最新推荐文章于 2024-05-30 01:49:43 发布

meavoChen

最新推荐文章于 2024-05-30 01:49:43 发布

阅读量1.6k

点赞数 2

分类专栏：大数据

本文链接：https://blog.csdn.net/looo000ngname/article/details/108076868

版权

大数据专栏收录该内容

22 篇文章 0 订阅

订阅专栏

业务场景参考

在IOT领域，传感器设备的数据向服务器发送数据，预想一个类型的数据放一个topic里，但是根据实际情况。

kaka的topic数越多，吞吐量性能下降厉害。

所以想象将分区做个自定义，然后消费者组的消费者消费指定分区，达到这一目的。

一自定义kafka分区

public class SimplePartitioner implements Partitioner {
    private final AtomicInteger atomicInteger = new AtomicInteger(0);

    @Override
    public void configure(Map<String, ?> configs) {
        // TODO Auto-generated method stub

    }

    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        // 从集群中获取所有分区信息，对key值进行分区
        List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
        //根据key进行分区 xx_01在01分区,xx_02在02分区
        int numPartitions = partitions.size();

        // key为null或空时使用轮训分区做负载均衡, 注意可以一定不能为null
        if (null == keyBytes || keyBytes.length < 1) {
            return atomicInteger.getAndIncrement() % numPartitions;
        }

        // 借用String的字符串计算方法，返回在哪个分区
        String strKey = key.toString();
        System.out.println(Integer.parseInt(strKey.split("_")[1]));
        return Integer.parseInt(strKey.split("_")[1]);



    }

    @Override
    public void close() {
        // TODO Auto-generated method stub

    }

}

二生产者生产数据放到指定分区

public class TestPartitionProducer implements Serializable {

    private  KafkaProducer kafkaProducer;
    public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part1";

    /**
     * 初始化
     */
    {
        //1. 创建配置对象 指定Producer的信息
        Properties properties = new Properties();

        properties.put("acks", "1");
        //配置默认的分区方式 ☆☆☆这里指定我们自定义的分区
        properties.put("partitioner.class", "com.dahai.kafka.producer.SimplePartitioner");
        //配置topic的序列化类
        properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        //配置value的序列化类
        properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
        //kafka broker对应的主机，格式为host1:port1,host2:port2
        properties.put("bootstrap.servers", "192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");

        //2. 创建Producer对象
         kafkaProducer=new KafkaProducer(properties);
    }

    /**
     * 发布消息
     * 自定义的数据
     */
    private void sendData(){
        //3. 发布消息
        int counter = 0;
        String[] str=new String[]{"lj:","xf:","other:" };
        String value;
        while (true) {
            if (counter%3==0){
                value = str[new Random().nextInt(3)] + new Random().nextInt(2);
            }else {
                value = "other:"+new Random().nextInt(2);
            }
			//设计key值,可以根据指定的数据设计指定的key,其中最后的数据应该为分区数,必须提前定义好
            String key ="zsli"+ counter + "_"+new Random().nextInt(3);

            /**
             * producer将 message发送数据到 kafka topic的时候，这条数据应该发到哪个partition分区里呢？
             *   message 有key,value组成
             *   当message的key为null值，必须禁止使用,则将message随机发送到partition里
             *   当message的key不为null值时，则通过自定义的key的最后的字符，得到数就是partition id.
             */
            ProducerRecord<String, String> record = new ProducerRecord<String, String>(TOPIC_SPARKSTREAMING, key, value);
            kafkaProducer.send(record);

            System.out.println("生产数据"+value + " ---"+System.currentTimeMillis());

            //每2条数据暂停1秒
            if (0 == counter % 2) {

                try {
                    Thread.sleep(1000);
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }

            counter++;
        }
    }

    /**
     * 主方法
     * @param args
     */
    public static void main(String[] args) {
        final TestPartitionProducer flumeKafkaProducer= new TestPartitionProducer();

        new Thread(new Runnable() {
            @Override
            public void run() {
                flumeKafkaProducer.sendData();
            }
        }).start();
    }
}

三指定消费者组的消费者消费指定分区

consumer1

public class JavaConsumer01 {

    public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part";
    public static void main(String[] args)  {


        //设置消费组的名称
        //将属性值反序列化
        Properties properties=new Properties();
        properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("bootstrap.servers","192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
        properties.put("group.id","demo");
        properties.put("enable.auto.commit", "true");
        properties.put("auto.offset.reset", "earliest");
        properties.put("auto.commit.interval.ms", "1000");


        //创建一个消费者客户端实例
        KafkaConsumer<String,String> consumer=new KafkaConsumer<>(properties);

        //订阅主题,指定订阅分区
        consumer.assign(Collections.singletonList(new TopicPartition(TOPIC_SPARKSTREAMING,0)));

        //循环消费消息
        while (true){
            ConsumerRecords<String,String> records=consumer.poll(0);
            for (ConsumerRecord<String,String> record:records){
               // System.out.println("获取数据"+record.value()+"---"+System.currentTimeMillis());
                System.out.printf("partition = %d,offset = %d, value = %s",  record.partition(),record.offset(), record.value());
                System.out.println("---"+System.currentTimeMillis());
            }
        }
    }
}

consumer2

public class JavaConsumer02 {

    public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part";
    public static void main(String[] args)  {


        //设置消费组的名称
        //将属性值反序列化
        Properties properties=new Properties();
        properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("bootstrap.servers","192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
        properties.put("group.id","demo");
        properties.put("enable.auto.commit", "true");
        properties.put("auto.offset.reset", "earliest");
        properties.put("auto.commit.interval.ms", "1000");

        //创建一个消费者客户端实例
        KafkaConsumer<String,String> consumer=new KafkaConsumer<>(properties);

        //订阅主题,指定订阅分区
        consumer.assign(Collections.singletonList(new TopicPartition(TOPIC_SPARKSTREAMING,1)));

        //循环消费消息
        while (true){
            ConsumerRecords<String,String> records=consumer.poll(0);
            for (ConsumerRecord<String,String> record:records){
                System.out.printf("partition = %d,offset = %d, value = %s",  record.partition(),record.offset(), record.value());
                System.out.println("---"+System.currentTimeMillis());
            }
        }
    }
}

consumer3

public class JavaConsumer03 {

    public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part";
    public static void main(String[] args)  {


        //设置消费组的名称
        //将属性值反序列化
        Properties properties=new Properties();
        properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
        properties.put("bootstrap.servers","192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
        properties.put("group.id","demo");
        properties.put("enable.auto.commit", "true");
        properties.put("auto.offset.reset", "earliest");
        properties.put("auto.commit.interval.ms", "1000");


        //创建一个消费者客户端实例
        KafkaConsumer<String,String> consumer=new KafkaConsumer<>(properties);

        //订阅主题,指定订阅分区
        consumer.assign(Collections.singletonList(new TopicPartition(TOPIC_SPARKSTREAMING,3)));

        //循环消费消息
        while (true){
            ConsumerRecords<String,String> records=consumer.poll(0);
            for (ConsumerRecord<String,String> record:records){
                System.out.printf("partition = %d,offset = %d, value = %s",  record.partition(),record.offset(), record.value());
                System.out.println("---"+System.currentTimeMillis());
            }
        }
    }
}

meavoChen

关注

2
点赞
踩
10

收藏

觉得还不错? 一键收藏
1
评论
kafka自定义分区，数据流向指定分区，消费者组的消费者消费指定分区（java）

业务场景参考在IOT领域，传感器设备的数据向服务器发送数据，预想一个类型的数据放一个topic里，但是根据实际情况。kaka的topic数越多，吞吐量性能下降厉害。所以想象将分区做个自定义，然后消费者组的消费者消费指定分区，达到这一目的。一自定义kafka分区public class SimplePartitioner implements Partitioner { private final AtomicInteger atomicInteger = new AtomicIntege
复制链接

扫一扫