kafka的API的简单应用
生产者
package com.chang;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
import java.io.IOException;
import java.util.Properties;
/**
* 向kafka的某一个topic中生产数据
*
* 生产者的入口类就是Producer(接口)
* 唯一的实现类KafkaProducer
*/
public class Myproducer {
public static void main(String[] args) {
Producer producer=null;
Properties properties =new Properties();
try {
//加载配置文件
properties.load(Myproducer.class.getClassLoader().getResourceAsStream("producer.properties"));
producer =new KafkaProducer(properties);
//发送数据
ProducerRecord<Integer,String> record =null;
for (int i=90;i<100;i++){
/**
* 参数1是指定的topic
* 参数2 是key
* 参数3 是value
*/
record=new ProducerRecord<>("hadoop", i, i+"hello nihao ");
producer.send(record);
}
} catch (IOException e) {
e.printStackTrace();
}
//关闭资源
producer.close();
}
}
配置文件
############################# Producer Basics #############################
# list of brokers used for bootstrapping knowledge about the rest of the cluster
# format: host1:port1,host2:port2 ...
bootstrap.servers=hadoop01:9092,haoop02:9092,hadoop03:9092
# specify the compression codec for all data generated: none, gzip, snappy, lz4
compression.type=none
# name of the partitioner class for partitioning events; default partition spreads data randomly
#指定分区的类的完全的限定名
#partitioner.class=com.chang.HashPartitioner
#partitioner.class=com.chang.RoundRobinPartitioner
#partitioner.class=com.chang.RandomPartitioner
# the maximum amount of time the client will wait for the response of a request
#request.timeout.ms=
#一个buffer中的数据发送到集群过程中额最大的阻塞的时间
#max.block.ms=
# 发送数据前的延迟时间
linger.ms=5000
# 每一个请求的最大的字节数
#max.request.size=
# 每个partition缓冲区的大小
#batch.size=
# 整个producer的缓冲区的大小
#buffer.memory=
#key对应的序列化器
key.serializer=org.apache.kafka.common.serialization.IntegerSerializer
#value对应的序列化器这两个参数如果不指定的话会报错
value.serializer=org.apache.kafka.common.serialization.StringSerializer
消费者
package com.chang;
import org.apache.kafka.clients.consumer.Consumer;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.Properties;
public class Myconsumer {
public static void main(String[] args) {
Consumer consumer=null;
Properties properties=new Properties();
try {
//加载配置文件
properties.load(Myconsumer.class.getClassLoader().getResourceAsStream("consumer.properties"));
consumer = new KafkaConsumer(properties);
/*订阅topic,因为 public void subscribe(Collection<String> topics);接收的是一个集合的类型
*所以要对topic的封装在集合中
*/
List<String> list=new ArrayList<>();
list.add("hadoop");
consumer.subscribe(list);
//循环接收
while (true){
//消费,指定拉取的时间间隔
ConsumerRecords<Integer, String> records = consumer.poll(1000);
for (ConsumerRecord<Integer, String> record : records) {
//获取消息的属性并打印
String topic = record.topic();
int partition = record.partition();
Integer key = record.key();
String value = record.value();
long offset = record.offset();
System.out.println(String.format("topic %s\t,partition %d\t,key:%d\t,value:%s\t,offset:%d\t",
topic, partition, key, value, offset));
}
}
} catch (IOException e) {
e.printStackTrace();
}finally {
// consumer.close();
}
}
}
配置文件
# Licensed to the Apache Software Foundation (ASF) under one or more
# contributor license agreements. See the NOTICE file distributed with
# this work for additional information regarding copyright ownership.
# The ASF licenses this file to You under the Apache License, Version 2.0
# (the "License"); you may not use this file except in compliance with
# the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# see org.apache.kafka.clients.consumer.ConsumerConfig for more details
# list of brokers used for bootstrapping knowledge about the rest of the cluster
# format: host1:port1,host2:port2 ...
bootstrap.servers=hadoop01:9092,haoop02:9092,hadoop03:9092
# consumer group id
group.id=myconsumer
# 消费数据的方式:latest(从偏移量最新的位置开始消费), earliest(从偏移量最早的位置开始消费)
# 默认latest
auto.offset.reset=earliest
#指定反序列化
#key对应的反序列化器de
key.deserializer=org.apache.kafka.common.serialization.IntegerDeserializer
#value对应的序列化器这两个参数如果不指定的话会报错
value.deserializer=org.apache.kafka.common.serialization.StringDeserializer
自定义分区
通过ProducerRecord源码的学习。
If a valid partition number is specified that partition will be used when sending the record. If no partition is
specified but a key is present a partition will be chosen using a hash of the key. If neither key nor partition is
present a partition will be assigned in a round-robin fashion.
Kafka中一条消息如何选择一个partition。三种策略:
1、如果指定了partition,数据直接进入该partition
2、如果没有指定partition,指定了key,使用hash(key) % partitionNum 找到partition
3、两个都没有指定,使用轮询方式进入的partition。
如果默认的分区方式不能满足条件的话可以自定义分区,下边实现几种简单的分区
自定义hash分区
package com.chang;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
public class HashPartitioner implements Partitioner {
/**
*
* @param topic 指定topic
* @param key key的值
* @param keyBytes key的字节数组
* @param value value的值
* @param valueBytes value的字节数组
* @param cluster 集群
* @return
*/
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
if(key==null){
return 0;
}else{
Integer topic_num = cluster.partitionCountForTopic(topic);
//key的hash的值
int key_hash = Math.abs(key.hashCode());
System.out.println(key_hash);
return key_hash%topic_num;
}
//获取指定的topic的分区的数量
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
}
轮询分区
package com.chang;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
import java.util.concurrent.atomic.AtomicInteger;
/**
* 按照轮询的方式进行分区
*/
public class RoundRobinPartitioner implements Partitioner {
//声明一个线程安全的计数器
private AtomicInteger counter=new AtomicInteger();
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
//获取分区的个数
Integer partition_num = cluster.partitionCountForTopic(topic);
int partition = counter.getAndIncrement() % partition_num;
return partition;
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
}
随机分区
package com.chang;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.common.Cluster;
import java.util.Map;
import java.util.Random;
/**
* 随机分区
*/
public class RandomPartitioner implements Partitioner {
Random random =new Random();
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
Integer partition_num = cluster.partitionCountForTopic(topic);
return random.nextInt(partition_num);
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> configs) {
}
}
注意设置好分区之后要在生产者的配置文件中指定分区的类的完全的限定名