kafka的API

最新推荐文章于 2024-09-09 17:45:48 发布

置顶大数据容器

最新推荐文章于 2024-09-09 17:45:48 发布

阅读量305

点赞数 1

分类专栏： kafka 文章标签： kafka -API kafka生产者 kafka消费者 kafka自定义分区 kafka生产者消费者代码实现

本文链接：https://blog.csdn.net/CHANGGUOLONG/article/details/94484146

版权

kafka 专栏收录该内容

2 篇文章 0 订阅

订阅专栏

kafka的API的简单应用

生产者

package com.chang;

import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;

import java.io.IOException;
import java.util.Properties;
/**
 * 向kafka的某一个topic中生产数据
 *
 * 生产者的入口类就是Producer(接口)
 *      唯一的实现类KafkaProducer
 */
public class Myproducer {
    public static void main(String[] args) {
        Producer producer=null;
        Properties properties =new Properties();
        try {
            //加载配置文件
            properties.load(Myproducer.class.getClassLoader().getResourceAsStream("producer.properties"));
           producer =new KafkaProducer(properties);

            //发送数据
            ProducerRecord<Integer,String> record =null;
            for (int i=90;i<100;i++){
                /**
                 * 参数1是指定的topic
                 * 参数2 是key
                 * 参数3 是value
                 */
                record=new ProducerRecord<>("hadoop", i, i+"hello nihao ");
                producer.send(record);
            }
        } catch (IOException e) {
            e.printStackTrace();
        }
       //关闭资源
        producer.close();

    }
}

配置文件

############################# Producer Basics #############################

# list of brokers used for bootstrapping knowledge about the rest of the cluster
# format: host1:port1,host2:port2 ...
bootstrap.servers=hadoop01:9092,haoop02:9092,hadoop03:9092

# specify the compression codec for all data generated: none, gzip, snappy, lz4
compression.type=none

# name of the partitioner class for partitioning events; default partition spreads data randomly
#指定分区的类的完全的限定名
#partitioner.class=com.chang.HashPartitioner
#partitioner.class=com.chang.RoundRobinPartitioner
#partitioner.class=com.chang.RandomPartitioner
# the maximum amount of time the client will wait for the response of a request
#request.timeout.ms=

#一个buffer中的数据发送到集群过程中额最大的阻塞的时间
#max.block.ms=

# 发送数据前的延迟时间
linger.ms=5000

# 每一个请求的最大的字节数
#max.request.size=

# 每个partition缓冲区的大小
#batch.size=

# 整个producer的缓冲区的大小
#buffer.memory=


#key对应的序列化器
key.serializer=org.apache.kafka.common.serialization.IntegerSerializer

#value对应的序列化器这两个参数如果不指定的话会报错
value.serializer=org.apache.kafka.common.serialization.StringSerializer

消费者

package com.chang;

import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;

import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;

public class Myconsumer {
    public static void main(String[] args) {
        Consumer consumer=null;
        Properties properties=new Properties();
        try {
            //加载配置文件
            properties.load(Myconsumer.class.getClassLoader().getResourceAsStream("consumer.properties"));
            consumer = new KafkaConsumer(properties);
            /*订阅topic,因为 public void subscribe(Collection<String> topics);接收的是一个集合的类型
            *所以要对topic的封装在集合中
             */
            List<String> list=new ArrayList<>();
            list.add("hadoop");
            consumer.subscribe(list);
                //循环接收
            while (true){
                //消费,指定拉取的时间间隔
                ConsumerRecords<Integer, String> records = consumer.poll(1000);
            for (ConsumerRecord<Integer, String> record : records) {
                //获取消息的属性并打印
                String topic = record.topic();
                int partition = record.partition();
                Integer key = record.key();
                String value = record.value();
                long offset = record.offset();
                System.out.println(String.format("topic %s\t,partition %d\t,key:%d\t,value:%s\t,offset:%d\t",
                        topic, partition, key, value, offset));

            }
        }
        } catch (IOException e) {
            e.printStackTrace();
        }finally {
           // consumer.close();
        }
    }
}

配置文件

# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements.  See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License.  You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# see org.apache.kafka.clients.consumer.ConsumerConfig for more details

# list of brokers used for bootstrapping knowledge about the rest of the cluster
# format: host1:port1,host2:port2 ...
bootstrap.servers=hadoop01:9092,haoop02:9092,hadoop03:9092

# consumer group id
group.id=myconsumer

# 消费数据的方式：latest(从偏移量最新的位置开始消费), earliest(从偏移量最早的位置开始消费)
# 默认latest
auto.offset.reset=earliest
#指定反序列化
#key对应的反序列化器de
key.deserializer=org.apache.kafka.common.serialization.IntegerDeserializer

#value对应的序列化器这两个参数如果不指定的话会报错
value.deserializer=org.apache.kafka.common.serialization.StringDeserializer

自定义分区

通过ProducerRecord源码的学习。
If a valid partition number is specified that partition will be used when sending the record. If no partition is
specified but a key is present a partition will be chosen using a hash of the key. If neither key nor partition is
present a partition will be assigned in a round-robin fashion.
Kafka中一条消息如何选择一个partition。三种策略：
1、如果指定了partition，数据直接进入该partition
2、如果没有指定partition，指定了key，使用hash(key) % partitionNum 找到partition
3、两个都没有指定，使用轮询方式进入的partition。

如果默认的分区方式不能满足条件的话可以自定义分区,下边实现几种简单的分区
自定义hash分区

package com.chang;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;

import java.util.Map;

public class HashPartitioner implements Partitioner {
    /**
     *
     * @param topic 指定topic
     * @param key  key的值
     * @param keyBytes key的字节数组
     * @param value value的值
     * @param valueBytes value的字节数组
     * @param cluster  集群
     * @return
     */
    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        if(key==null){
            return 0;
        }else{
            Integer topic_num = cluster.partitionCountForTopic(topic);
            //key的hash的值
            int key_hash = Math.abs(key.hashCode());
            System.out.println(key_hash);
            return key_hash%topic_num;
        }
        //获取指定的topic的分区的数量

    }

    @Override
    public void close() {

    }

    @Override
    public void configure(Map<String, ?> configs) {

    }
}

轮询分区

package com.chang;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;

import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;

/**
 * 按照轮询的方式进行分区
 */
public class RoundRobinPartitioner  implements Partitioner {

    //声明一个线程安全的计数器
    private AtomicInteger counter=new AtomicInteger();
    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
      //获取分区的个数
        Integer partition_num = cluster.partitionCountForTopic(topic);
        int partition = counter.getAndIncrement() % partition_num;

        return partition;
    }

    @Override
    public void close() {

    }

    @Override
    public void configure(Map<String, ?> configs) {

    }
}

随机分区

package com.chang;

import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;

import java.util.Map;
import java.util.Random;

/**
 * 随机分区
 */
public class RandomPartitioner implements Partitioner {
    Random random =new Random();
    @Override
    public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
        Integer partition_num = cluster.partitionCountForTopic(topic);

        return random.nextInt(partition_num);
    }

    @Override
    public void close() {

    }

    @Override
    public void configure(Map<String, ?> configs) {

    }
}