kafka
实战啊,首先我们先清空下topic
删除topic
kafka-topics.sh --delete --zookeeper node1/kafka --topic ooxx
提示:
Topic ooxx is marked for deletion.
Note: This will have no impact if delete.topic.enable is not set to true.
此时的删除并不是真正的删除,而是把topic标记为删除:marked for deletion
但是我发现新版本里面zookeeper 也没有该topic,在查询list 得时候也不存在该topic
查看topic是否还存在
kafka-topics.sh --zookeeper node1/kafka --list
代码
我前面得博客没有把外部得advertised.listeners
打开,记得在这个后面添加主机得外部ip地址
创建Producer
package com.example.kafka.lesson;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import org.apache.kafka.common.serialization.StringSerializer;
import org.junit.jupiter.api.Test;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* @author zjj
* @create 2022/3/8 15:51
* @Description:
*/
public class lesson01 {
/**
* 创建topic
*/
@Test
public void producer() throws ExecutionException, InterruptedException {
String topic="test-items";
Properties properties = new Properties();
properties.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"172.16.111.47:9092,172.16.111.45:9092");
properties.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
properties.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
KafkaProducer kafkaProducer = new KafkaProducer(properties);
for (int i = 0; i <3 ; i++) {
for (int j = 0; j <3 ; j++) {
ProducerRecord producerRecord = new ProducerRecord( topic,"item" + j, "val" + i);
Future<RecordMetadata> send = kafkaProducer.send(producerRecord);
// 强制阻塞返回结果
RecordMetadata rm = send.get();
int partition = rm.partition();
long offset = rm.offset();
System.out.println("key: "+producerRecord.key() +" val:"+producerRecord.value()+
" topic:"+topic+" partition:"+partition+" offset:"+offset );
}
}
}
}
consumer (自动提交)
//一个运行的consumer ,那么自己会维护自己消费进度
//一旦你自动提交,但是是异步的
//1,还没到时间,挂了,没提交,重起一个consuemr,参照offset的时候,会重复消费
//2,一个批次的数据还没写数据库成功,但是这个批次的offset背异步提交了,挂了,重起一个consuemr,参照offset的时候,会丢失消费
kafka-consumer-groups.sh --bootstrap-server server1:19092 --list
kafka-consumer-groups.sh --bootstrap-server server1:19092 --describe --group OOXX
/**
* 查看当前kafka分区
* kafka-consumer-groups.sh --bootstrap-server node02:9092 --list
*/
@Test
public void consumer(){
String topic="topic-test1";
Properties p = new Properties();
p.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"server1:9092,server2:9092");
p.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
p.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
// properties.setProperty(KEY_DESERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
//消费的细节
p.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"OOXX");
//KAKFA IS MQ IS STORAGE 第一次启动,米有offset
/**
* "What to do when there is no initial offset in Kafka or if the current offset
* does not exist any more on the server
* (e.g. because that data has been deleted):
* <ul> 最早得
* <li>earliest: automatically reset the offset to the earliest offset
* 等待链接时候消费 (默认从尾部开始)
* <li>latest: automatically reset the offset to the latest offset</li>
* 剩下得是抛出异常
* <li>none: throw exception to the consumer if no previous offset is found for the consumer's group</li><li>anything else: throw exception to the consumer.</li>
* </ul>";
*/
p.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//自动提交时异步提交,容易出现 丢数据&&重复数据
p.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"true");
// 默认5秒后提交
// properties.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"15000");
// POLL 拉取数据,弹性,按需,拉取多少?
// properties.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,"");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(p);
consumer.subscribe(Arrays.asList(topic));
while(true){
// 有了立即返回 返回数据 0-n 多条数据
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(0));
if(!records.isEmpty()) {
System.out.println("-----------" + records.count() + "-------------");
Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
while (iterator.hasNext()) {
// 因为一个consuemr可以消费多个分区,但是一个分区只能给一个组里的一个consuemr消费
ConsumerRecord<String, String> next = iterator.next();
// 归属那个分区
int partition = next.partition();
// 偏移量
long offset = next.offset();
System.out.println("key: " + next.key() + " val: " + next.value() + " partition: " + partition + " offset: " + offset);
}
}
}
}
消费数据
consumer 非主动提交
自己去维护offset 颗粒度
/**
* 查看当前kafka分区
* kafka-consumer-groups.sh --bootstrap-server node02:9092 --list
*/
@Test
public void consumer(){
String topic="topic-test1";
Properties p = new Properties();
p.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"server1:9092,server2:9092");
p.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
p.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
// properties.setProperty(KEY_DESERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
//消费的细节
p.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"OOXX");
//KAKFA IS MQ IS STORAGE 第一次启动,米有offset
/**
* "What to do when there is no initial offset in Kafka or if the current offset
* does not exist any more on the server
* (e.g. because that data has been deleted):
* <ul> 最早得
* <li>earliest: automatically reset the offset to the earliest offset
* 等待链接时候消费 (默认从尾部开始)
* <li>latest: automatically reset the offset to the latest offset</li>
* 剩下得是抛出异常
* <li>none: throw exception to the consumer if no previous offset is found for the consumer's group</li><li>anything else: throw exception to the consumer.</li>
* </ul>";
*/
p.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//自动提交时异步提交,容易出现 丢数据&&重复数据
p.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
// 默认5秒后提交
// properties.setProperty(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,"15000");
// POLL 拉取数据,弹性,按需,拉取多少?
// properties.setProperty(ConsumerConfig.MAX_POLL_RECORDS_CONFIG,"");
KafkaConsumer<String, String> consumer = new KafkaConsumer<>(p);
consumer.subscribe(Arrays.asList(topic), new ConsumerRebalanceListener() {
@Override
public void onPartitionsRevoked(Collection<TopicPartition> partitions) {
System.out.println("---onPartitionsRevoked:");
Iterator<TopicPartition> iter = partitions.iterator();
while(iter.hasNext()){
System.out.println("---- "+iter.next().partition());
}
}
@Override
public void onPartitionsAssigned(Collection<TopicPartition> partitions) {
System.out.println("---onPartitionsAssigned:");
Iterator<TopicPartition> iter = partitions.iterator();
while(iter.hasNext()){
System.out.println("---- "+iter.next().partition());
}
}
});
while(true){
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(0));
if(!records.isEmpty()) {
System.out.println("-----------" + records.count() + "-------------");
// 每次poll得时候是取多个分区得数据
Set<TopicPartition> partitions = records.partitions();
/**
* 如果手动提交offset
* 1,按消息进度同步提交
* 2,按分区粒度同步提交
* 3,按当前poll的批次同步提交
*
* 思考:如果在多个线程下
* 1,以上1,3的方式不用多线程
* 2,以上2的方式最容易想到多线程方式处理,有没有问题?
*/
for (TopicPartition partition: partitions) {
List<ConsumerRecord<String, String>> records1 = records.records(partition);
//在一个微批里,按分区获取poll回来的数据
//线性按分区处理,还可以并行按分区处理用多线程的方式
Iterator<ConsumerRecord<String, String>> piter = records1.iterator();
while(piter.hasNext()){
ConsumerRecord<String, String> next = piter.next();
int par = next.partition();
long offset = next.offset();
String key = next.key();
String value = next.value();
long timestamp = next.timestamp();
System.out.println("key: "+ key+" val: "+ value+ " partition: "+par + " offset: "+ offset+"time:: "+ timestamp);
TopicPartition sp = new TopicPartition(topic, par);
OffsetAndMetadata om = new OffsetAndMetadata(offset);
HashMap<TopicPartition, OffsetAndMetadata> map = new HashMap<>();
map.put(sp,om);
consumer.commitSync(map);//这个是最安全的,每条记录级的更新,第一点
//单线程,多线程,都可以
}
long poff = records1.get(records1.size() - 1).offset();//获取分区内最后一条消息的offset
OffsetAndMetadata pom = new OffsetAndMetadata(poff);
HashMap<TopicPartition, OffsetAndMetadata> map = new HashMap<>();
map.put(partition,pom);
consumer.commitSync( map );//这个是第二种,分区粒度提交offset
/**
* 因为你都分区了
* 拿到了分区的数据集
* 期望的是先对数据整体加工
* 小问题会出现? 你怎么知道最后一条小的offset?!!!!
* 感觉一定要有,kafka,很傻,你拿走了多少,我不关心,你告诉我你正确的最后一个小的offset
*/
}
//这个就是按poll的批次提交offset,第3点
consumer.commitAsync();
}
}
// consumer.subscribe(Arrays.asList(topic));
/* while(true){
// 有了立即返回 返回数据 0-n 多条数据
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(0));
if(!records.isEmpty()) {
System.out.println("-----------" + records.count() + "-------------");
Iterator<ConsumerRecord<String, String>> iterator = records.iterator();
while (iterator.hasNext()) {
// 因为一个consuemr可以消费多个分区,但是一个分区只能给一个组里的一个consuemr消费
ConsumerRecord<String, String> next = iterator.next();
// 归属那个分区
int partition = next.partition();
// 偏移量
long offset = next.offset();
System.out.println("key: " + next.key() + " val: " + next.value() + " partition: " + partition + " offset: " + offset);
}
}
}
*/
}