业务场景参考
在IOT领域,传感器设备的数据向服务器发送数据,预想一个类型的数据放一个topic里,但是根据实际情况。
kaka的topic数越多,吞吐量性能下降厉害。
所以想象将分区做个自定义,然后消费者组的消费者消费指定分区,达到这一目的。
一 自定义kafka分区
public class SimplePartitioner implements Partitioner {
private final AtomicInteger atomicInteger = new AtomicInteger(0);
@Override
public void configure(Map<String, ?> configs) {
// TODO Auto-generated method stub
}
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
// 从集群中获取所有分区信息,对key值进行分区
List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
//根据key进行分区 xx_01在01分区,xx_02在02分区
int numPartitions = partitions.size();
// key为null或空时使用轮训分区做负载均衡, 注意可以一定不能为null
if (null == keyBytes || keyBytes.length < 1) {
return atomicInteger.getAndIncrement() % numPartitions;
}
// 借用String的字符串计算方法,返回在哪个分区
String strKey = key.toString();
System.out.println(Integer.parseInt(strKey.split("_")[1]));
return Integer.parseInt(strKey.split("_")[1]);
}
@Override
public void close() {
// TODO Auto-generated method stub
}
}
二 生产者生产数据放到指定分区
public class TestPartitionProducer implements Serializable {
private KafkaProducer kafkaProducer;
public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part1";
/**
* 初始化
*/
{
//1. 创建配置对象 指定Producer的信息
Properties properties = new Properties();
properties.put("acks", "1");
//配置默认的分区方式 ☆☆☆这里指定我们自定义的分区
properties.put("partitioner.class", "com.dahai.kafka.producer.SimplePartitioner");
//配置topic的序列化类
properties.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//配置value的序列化类
properties.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
//kafka broker对应的主机,格式为host1:port1,host2:port2
properties.put("bootstrap.servers", "192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
//2. 创建Producer对象
kafkaProducer=new KafkaProducer(properties);
}
/**
* 发布消息
* 自定义的数据
*/
private void sendData(){
//3. 发布消息
int counter = 0;
String[] str=new String[]{"lj:","xf:","other:" };
String value;
while (true) {
if (counter%3==0){
value = str[new Random().nextInt(3)] + new Random().nextInt(2);
}else {
value = "other:"+new Random().nextInt(2);
}
//设计key值,可以根据指定的数据设计指定的key,其中最后的数据应该为分区数,必须提前定义好
String key ="zsli"+ counter + "_"+new Random().nextInt(3);
/**
* producer将 message发送数据到 kafka topic的时候,这条数据应该发到哪个partition分区里呢?
* message 有key,value组成
* 当message的key为null值,必须禁止使用,则将message随机发送到partition里
* 当message的key不为null值时,则通过自定义的key的最后的字符,得到数就是partition id.
*/
ProducerRecord<String, String> record = new ProducerRecord<String, String>(TOPIC_SPARKSTREAMING, key, value);
kafkaProducer.send(record);
System.out.println("生产数据"+value + " ---"+System.currentTimeMillis());
//每2条数据暂停1秒
if (0 == counter % 2) {
try {
Thread.sleep(1000);
} catch (InterruptedException e) {
e.printStackTrace();
}
}
counter++;
}
}
/**
* 主方法
* @param args
*/
public static void main(String[] args) {
final TestPartitionProducer flumeKafkaProducer= new TestPartitionProducer();
new Thread(new Runnable() {
@Override
public void run() {
flumeKafkaProducer.sendData();
}
}).start();
}
}
三 指定消费者组的消费者消费指定分区
consumer1
public class JavaConsumer01 {
public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part";
public static void main(String[] args) {
//设置消费组的名称
//将属性值反序列化
Properties properties=new Properties();
properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("bootstrap.servers","192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
properties.put("group.id","demo");
properties.put("enable.auto.commit", "true");
properties.put("auto.offset.reset", "earliest");
properties.put("auto.commit.interval.ms", "1000");
//创建一个消费者客户端实例
KafkaConsumer<String,String> consumer=new KafkaConsumer<>(properties);
//订阅主题,指定订阅分区
consumer.assign(Collections.singletonList(new TopicPartition(TOPIC_SPARKSTREAMING,0)));
//循环消费消息
while (true){
ConsumerRecords<String,String> records=consumer.poll(0);
for (ConsumerRecord<String,String> record:records){
// System.out.println("获取数据"+record.value()+"---"+System.currentTimeMillis());
System.out.printf("partition = %d,offset = %d, value = %s", record.partition(),record.offset(), record.value());
System.out.println("---"+System.currentTimeMillis());
}
}
}
}
consumer2
public class JavaConsumer02 {
public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part";
public static void main(String[] args) {
//设置消费组的名称
//将属性值反序列化
Properties properties=new Properties();
properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("bootstrap.servers","192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
properties.put("group.id","demo");
properties.put("enable.auto.commit", "true");
properties.put("auto.offset.reset", "earliest");
properties.put("auto.commit.interval.ms", "1000");
//创建一个消费者客户端实例
KafkaConsumer<String,String> consumer=new KafkaConsumer<>(properties);
//订阅主题,指定订阅分区
consumer.assign(Collections.singletonList(new TopicPartition(TOPIC_SPARKSTREAMING,1)));
//循环消费消息
while (true){
ConsumerRecords<String,String> records=consumer.poll(0);
for (ConsumerRecord<String,String> record:records){
System.out.printf("partition = %d,offset = %d, value = %s", record.partition(),record.offset(), record.value());
System.out.println("---"+System.currentTimeMillis());
}
}
}
}
consumer3
public class JavaConsumer03 {
public final static String TOPIC_SPARKSTREAMING="TOPIC_SPARKSTREAMING_part";
public static void main(String[] args) {
//设置消费组的名称
//将属性值反序列化
Properties properties=new Properties();
properties.put("key.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("value.deserializer","org.apache.kafka.common.serialization.StringDeserializer");
properties.put("bootstrap.servers","192.168.1.109:9092,192.168.1.111:9092,192.168.1.112:9092");
properties.put("group.id","demo");
properties.put("enable.auto.commit", "true");
properties.put("auto.offset.reset", "earliest");
properties.put("auto.commit.interval.ms", "1000");
//创建一个消费者客户端实例
KafkaConsumer<String,String> consumer=new KafkaConsumer<>(properties);
//订阅主题,指定订阅分区
consumer.assign(Collections.singletonList(new TopicPartition(TOPIC_SPARKSTREAMING,3)));
//循环消费消息
while (true){
ConsumerRecords<String,String> records=consumer.poll(0);
for (ConsumerRecord<String,String> record:records){
System.out.printf("partition = %d,offset = %d, value = %s", record.partition(),record.offset(), record.value());
System.out.println("---"+System.currentTimeMillis());
}
}
}
}