本次实验是基于kafka 2.6.0, 早的版本可能不太一样,具体区别看下面分析
假设有这样一种情况:
topicB里有写入一些数据,
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.clients.producer.RecordMetadata;
import java.util.Properties;
import java.util.concurrent.Future;
public class Producer {
public static void main(String[] args) throws Exception {
String topicName = "topicB";
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("retries", 0);
props.put("batch.size", 16384);
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
org.apache.kafka.clients.producer.Producer<String, String> producer = new KafkaProducer
<String, String>(props);
for (int i=0; i<300; i++) {
String content = "+++++++ " + i + " ++++++++++++";
/**
* 发送一个数据
*/
Future<RecordMetadata> meta = producer.send(new ProducerRecord<String, String>(topicName,
content, content));
meta.get();
}
System.out.println("Message sent successfully");
producer.close();
}
}
然后开启一个事务, consume-process-produce, 写完数据以后, 不要commit, 直接退出了
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.clients.consumer.OffsetAndMetadata;
import org.apache.kafka.clients.producer.*;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.common.TopicPartition;
import java.time.Duration;
import java.util.Arrays;
import java.util.HashMap;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.Future;
public class TransactionalProducer {
public static void main(String[] args) throws Exception {
String topicName = "topicA";
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("acks", "all");
props.put("transactional.id", "test-transactional");
props.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, "true");
props.put("buffer.memory", 33554432);
props.put("key.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer",
"org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer
<String, String>(props);
Properties props1 = new Properties();
props1.put("bootstrap.servers", "localhost:9092");
props1.put("group.id", "my-group");
props1.put("enable.auto.commit", "false");
props1.put("auto.offset.reset", "earliest");
props1.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props1.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer
<String, String>(props1);
consumer.subscribe(Arrays.asList("topicB"));
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
long offset = 0;
boolean isFind = false;
while(isFind == false) {
for (ConsumerRecord<String, String> record : records) {
System.out.printf("offset = %d, key = %s, value = %sn",
record.offset(), record.key(), record.value());
offset = record.offset();
}
if (records != null && records.isEmpty() ==false) {
isFind = true;
}
}
System.out.println("consumer is finished.......");
producer.initTransactions();
producer.beginTransaction();
for (int i = 0; i < 10; i++) {
String content = "=========== " + i + " ===========";
/**
* 发送一个数据
*/
Future<RecordMetadata> meta = producer.send(new ProducerRecord<String, String>(topicName,
content, content));
meta.get();
}
Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
TopicPartition tp = new TopicPartition("topicB", 0);
offsets.put(tp, new OffsetAndMetadata(offset + 1));
producer.sendOffsetsToTransaction(offsets, "my-group");
System.out.println("Message sent successfully");
}
}
然后, 我们再去用普通的consumer去消费的时候, 看一下结果:
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import java.time.Duration;
import java.util.Arrays;
import java.util.Properties;
public class Consumer {
public static void main(String[] args) throws Exception {
String topicName = "topicB";
Properties props = new Properties();
props.put("bootstrap.servers", "localhost:9092");
props.put("group.id", "my-group");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("auto.offset.reset", "earliest");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer",
"org.apache.kafka.common.serialization.StringDeserializer");
KafkaConsumer<String, String> consumer = new KafkaConsumer
<String, String>(props);
consumer.subscribe(Arrays.asList(topicName));
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(10000));
for (ConsumerRecord<String, String> record : records) {
System.out.printf("offset = %d, key = %s, value = %sn",
record.offset(), record.key(), record.value());
}
}
}
}
这个时候, 再来看, 普通的consumer无法获取offset, 它会提示, 有unstable的 offset
可能是事务没有提交, 这个会一直在 do {} while里,不退出
The following partitions still have unstable offsets
which are not cleared on the broker side: [topicB-0],
this could be either transactional offsets waiting for completion,
or normal offsets waiting for replication after appending to local log
具体原因我们可以看看这个代码, 它有判断 hasPendingOffsetCommitsForTopicPartition,
所以kafka事务, consume-process-produce这种场景下, 如果producer在sendOffsetsToTransaction以后,crash了, 那个group的consumer是无法继续消费的, 要等到事务确定是Abort 或者 Commit了以后, 通过WriteTxnMarkerRequest写入_consumer_offset里面,才能获取toffset继续消费