控制台调用
创建topic
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-topics.sh --bootstrap-server kafka:9092 --create --topic topic01 --partitions 2 --replication-factor 1
创建控制台消费者
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-console-consumer.sh --bootstrap-server kafka:9092 --topic topic01 --group group1
还有其它参数
–property print.key=true
–property print.value=true
–property key.separator=,
创建控制台生产者
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-console-producer.sh --broker-list kafka:9092 --topic topic01
查看集群中有多少个topic
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-topics.sh --bootstrap-server kafka:9092 --list
查看topic详情
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-topics.sh --bootstrap-server kafka:9092 --describe --topic topic01
修改topic分区数
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-topics.sh --bootstrap-server kafka:9092 --alter --topic topic01 --partitions 3
删除topic
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-topics.sh --bootstrap-server kafka:9092 --delete --topic topic01
查看消费者组的信息
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-consumer-groups.sh --bootstrap-server kafka:9092 --list
查看组的详细信息
/usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-consumer-groups.sh --bootstrap-server kafka:9092 --describe --group group1
JAVA客户端调用
导入maven jar包
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.2.0</version>
</dependency>
<dependency>
<groupId>org.apache.commons</groupId>
<artifactId>commons-lang3</artifactId>
<version>3.11</version>
</dependency>
新增topic
import org.apache.kafka.clients.admin.*;
import org.apache.kafka.common.KafkaFuture;
import java.util.Arrays;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutionException;
public class KafkaDemo01 {
public static void main(String[] args) throws ExecutionException, InterruptedException {
Properties properties = new Properties();
properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,"kafka:9092");
KafkaAdminClient kafkaAdminClient = (KafkaAdminClient) KafkaAdminClient.create(properties);
// 新增topic
CreateTopicsResult createTopicResult = kafkaAdminClient.createTopics(Arrays.asList(new NewTopic("topic02", 10, (short) 1)));
createTopicResult.all().get();
kafkaAdminClient.close();
}
}
删除topic
// 删除topic
DeleteTopicsResult delTopic = kafkaAdminClient.deleteTopics(Arrays.asList("topic01"));
delTopic.all().get();
查看topic列表
// 查看topic列表
ListTopicsResult listTopicsResult = kafkaAdminClient.listTopics();
Set<String> topicSet = listTopicsResult.names().get();
for (String topic : topicSet) {
System.out.println(topic);
}
查看topic详情
DescribeTopicsResult topic02 = kafkaAdminClient.describeTopics(Arrays.asList("topic02"));
Map<String, TopicDescription> stringTopicDescriptionMap = topic02.all().get();
for (Map.Entry<String, TopicDescription> entry : stringTopicDescriptionMap.entrySet()) {
System.out.println(entry.getKey()+"\t"+entry.getValue());
}
查看topic 的 partition数量
DescribeTopicsResult topic = kafkaAdminClient.describeTopics(Arrays.asList("topic02"));
Map<String, TopicDescription> map = topic.all().get();
for (Map.Entry<String, TopicDescription> entry : map.entrySet()) {
TopicDescription value = entry.getValue();
System.out.println(value.partitions().size());
}
Kafka生产者调用Demo
import org.apache.kafka.clients.admin.AdminClientConfig;
import org.apache.kafka.clients.admin.DescribeTopicsResult;
import org.apache.kafka.clients.admin.KafkaAdminClient;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Arrays;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
public class KafkaProducerDemo {
public static void main(String[] args) throws ExecutionException, InterruptedException {
int partitions = findPartitions();
System.out.println("partitions size == "+partitions);
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"kafka:9092");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class.getName());
KafkaProducer<String,String> kafkaProducer = new KafkaProducer<String,String>(properties);
for (int i=0;i<1000;i++){
int currPartition = i % partitions ;
// 不配置partition和key,会使用轮询方式写入各个partition
// 不配置partition,配置key,会根据key的hash值写入各个partition
ProducerRecord<String,String> producerRecord = new ProducerRecord<String,String>("topic02",currPartition,"key"+i,"value"+i);
kafkaProducer.send(producerRecord);
}
kafkaProducer.close();
}
private static int findPartitions() throws ExecutionException, InterruptedException {
Properties properties = new Properties();
properties.put(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,"kafka:9092");
KafkaAdminClient kafkaAdminClient = (KafkaAdminClient) KafkaAdminClient.create(properties);
DescribeTopicsResult topic = kafkaAdminClient.describeTopics(Arrays.asList("topic02"));
Map<String, TopicDescription> map = topic.all().get();
for (Map.Entry<String, TopicDescription> entry : map.entrySet()) {
TopicDescription value = entry.getValue();
return value.partitions().size();
}
return 1;
}
}
Kafka生产者调用Demo2
自定义partitioner
import org.apache.kafka.clients.admin.AdminClientConfig;
import org.apache.kafka.clients.admin.DescribeTopicsResult;
import org.apache.kafka.clients.admin.KafkaAdminClient;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Partitioner;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.utils.Utils;
import java.util.Arrays;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.atomic.AtomicInteger;
public class KafkaProducerDemo {
public static void main(String[] args) throws ExecutionException, InterruptedException {
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka:9092");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
// 设置投递分区规则
properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG, MyPartitioner.class.getName());
KafkaProducer<String, String> kafkaProducer = new KafkaProducer<String, String>(properties);
for (int i = 0; i < 15; i++) {
ProducerRecord<String, String> producerRecord = new ProducerRecord<String, String>("topic02", "value" + i);
kafkaProducer.send(producerRecord);
}
kafkaProducer.close();
}
public static class MyPartitioner implements Partitioner {
private static AtomicInteger count = new AtomicInteger(0);
@Override
public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) {
List<PartitionInfo> partitions = cluster.partitionsForTopic(topic);
int numPartitions = partitions.size();
if (keyBytes == null || keyBytes.length == 0) {
int andAdd = count.getAndAdd(1);
return Utils.toPositive(andAdd) % numPartitions;
} else {
return Utils.toPositive(Utils.murmur2(keyBytes)) % numPartitions;
}
}
@Override
public void close() {
System.out.println("close...");
}
@Override
public void configure(Map<String, ?> map) {
System.out.println("configure...");
}
}
}
Kafka消费者调用Demo
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.Iterator;
import java.util.Properties;
import java.util.regex.Pattern;
public class KafkaConsumerDemo {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"kafka:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName());
properties.put(ConsumerConfig.GROUP_ID_CONFIG,"group01");
KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(properties);
consumer.subscribe(Pattern.compile("^topic.*"));
while(true){
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
if(!records.isEmpty()){
Iterator<ConsumerRecord<String, String>> recordIt = records.iterator();
while(recordIt.hasNext()){
ConsumerRecord<String, String> record = recordIt.next();
String topic = record.topic();
int partition = record.partition();
long offset = record.offset();
String key = record.key();
String value = record.value();
long timestamp = record.timestamp();
System.out.println(topic+"\t"+partition+":"+offset+" "+key+":"+value+" "+timestamp);
}
}
}
}
}
Kafka消费者调用Demo2
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.time.Duration;
import java.util.Arrays;
import java.util.Iterator;
import java.util.List;
import java.util.Properties;
import java.util.regex.Pattern;
public class KafkaConsumerDemo {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"kafka:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName());
KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(properties);
// 手动指定消费分区,失去组管理特性
List<TopicPartition> partitions = Arrays.asList(new TopicPartition("topic01",0));
consumer.assign(partitions);
// 重新指定消费分区的位置
// consumer.seekToBeginning(partitions);
consumer.seek(new TopicPartition("topic01",0),125);
while(true){
ConsumerRecords<String, String> records = consumer.poll(Duration.ofMillis(100));
if(!records.isEmpty()){
Iterator<ConsumerRecord<String, String>> recordIt = records.iterator();
while(recordIt.hasNext()){
ConsumerRecord<String, String> record = recordIt.next();
String topic = record.topic();
int partition = record.partition();
long offset = record.offset();
String key = record.key();
String value = record.value();
long timestamp = record.timestamp();
System.out.println(topic+"\t"+partition+":"+offset+" "+key+":"+value+" "+timestamp);
}
}
}
}
}
自定义序列化
import org.apache.commons.lang3.SerializationUtils;
// 序列化
public static class MySerializer implements Serializer {
@Override
public void configure(Map configs, boolean isKey) {
}
@Override
public byte[] serialize(String topic, Object data) {
return SerializationUtils.serialize((Serializable) data);
}
@Override
public void close() {
}
}
// 反序列化
public static class MyDeserializer implements Deserializer {
@Override
public void configure(Map map, boolean b) {
}
@Override
public Object deserialize(String s, byte[] bytes) {
return SerializationUtils.deserialize(bytes);
}
@Override
public void close() {
}
}
配置
// 生产者追加配置
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, MySerializer.class.getName());
//消费者追加配置
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,MyDeserializer.class.getName());
生产者拦截器调用
自定义拦截器
public static class MyProducerInterceptor implements ProducerInterceptor {
@Override
public ProducerRecord onSend(ProducerRecord producerRecord) {
Object value = producerRecord.value();
if(value instanceof User){
User u = (User)value;
u.setName(u.getName()+" -- sywh");
}
return new ProducerRecord(producerRecord.topic(),producerRecord.partition(),producerRecord.key(),value);
}
@Override
public void onAcknowledgement(RecordMetadata recordMetadata, Exception e) {
System.out.println("metaData:"+recordMetadata+",exception:"+e);
}
@Override
public void close() {
}
@Override
public void configure(Map<String, ?> map) {
}
}
拦截器配置
properties.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,MyProducerInterceptor.class.getName());
消费者偏移量控制
// 设置从最近消息开始消费
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
// 1秒钟提交一次消费偏移量
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,1000);
// 自动提交消费偏移量
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true);
auto.offset.reset
- latest、当前topic没有被消费过时,使用当前topic最晚偏移量获取消息
- earliest、当前topic没有被消费过时,使用当前topic最早偏移量获取消息;也就是说当前topic已经被当前分组消费过,如果配置成earliest就会从最早那个没被消费的偏移量开始消费,这个配置可以保证消息被当前topic最少消费一次
- none 如果未找到消费者组的偏移量,则向消费者抛出异常
偏移量控制代码示例
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
// 记录消费分区
Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
// 将消费偏移量保存到offsetMap,注意这个偏移量要+1,否则下次消费者启动的时候(消费者记录偏移量并且没将偏移量带过来),还会消费一遍最后这个偏移量对应的消息
offsetMap.put(new TopicPartition(topic,partition),new OffsetAndMetadata(offset+1));
// 异步提交消费偏移量
consumer.commitAsync(offsetMap, new OffsetCommitCallback() {
@Override
public void onComplete(Map<TopicPartition, OffsetAndMetadata> map, Exception e) {
System.out.println("offsets:"+map+",exception:"+e);
}
});
生产者重试机制
// 设置应答方式
properties.put(ProducerConfig.ACKS_CONFIG,"all");
// 设置失败重试次数为3次
properties.put(ProducerConfig.RETRIES_CONFIG,3);
// 设置超时时间为2000毫秒
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG,2000);
KafkaProducer<String, User> kafkaProducer = new KafkaProducer<String, User>(properties);
ProducerRecord<String, User> producerRecord = new ProducerRecord<String, User>("topicUser", "retry",new User("id111","name111"));
kafkaProducer.send(producerRecord);
kafkaProducer.flush();
- acks=0:则生产者将根本不等待来自服务器的任何确认。记录将立即添加到套接字缓冲区,并被视为已发送。在这种情况下,无法保证服务器已收到该记录,重试配置将不会生效(因为客户端通常不会知道任何故障)。为每条记录返回的偏移量将始终设置为-1。
- acks=1:这意味着领导者会将记录写入其本地日志,但不会等待所有follower的完全确认。在这种情况下,如果领导者在确认记录后,但在跟随者复制之前立即失败,则记录将丢失。
- acks=all:这意味着领导者将等待全套同步副本来确认记录。这保证了只要至少有一个同步副本保持活动状态,记录就不会丢失。这是最有力的保证。这相当于acks=-1设置。
生产者幂等写
- 当设置为“true”时,生产者将确保流中只写入每条消息的一个副本。
- 如果为“false”,则由于代理失败等原因导致的生产者重试可能会在流中写入重试消息的副本。
请注意,启用幂等性需要 max.in.flight.requests.per.connection 要小于或等于5(为了保证严格按顺序写入,这个值设置为1), 重试retries大于0,acks必须为“all”,如果用户未明确设置这些值,则将选择合适的值。如果设置了不兼容的值,将抛出ConfigException
幂等写代码示例:
// 设置应答方式
properties.put(ProducerConfig.ACKS_CONFIG,"all");
// 设置失败重试次数为3次,不包含第一次发送
properties.put(ProducerConfig.RETRIES_CONFIG,3);
// 设置超时时间为1毫秒
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG,1);
// 开启幂等写
properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG,true);
// 保证严格有序
properties.put(ProducerConfig.MAX_IN_FLIGHT_REQUESTS_PER_CONNECTION,1);
KafkaProducer<String, User> kafkaProducer = new KafkaProducer<String, User>(properties);
ProducerRecord<String, User> producerRecord = new ProducerRecord<String, User>("topicUser", "retry",new User("id11122","name11122"));
kafkaProducer.send(producerRecord);
kafkaProducer.flush();
kafkaProducer.close();
生产者事务
生产者代码
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafkaA:9092,kafkaB:9092,kafkaC:9092");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, MySerializer.class.getName());
// 设置事务ID
properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, "transaction-id" + UUID.randomUUID().toString());
// 开启幂等写
properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true);
// 设置应答方式
properties.put(ProducerConfig.ACKS_CONFIG, "all");
// 设置应答超时时间为20秒
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 20000);
// 批处理大小
properties.put(ProducerConfig.BATCH_SIZE_CONFIG, 1024);
// 如果批处理中的数据不足1024大小,批处理等待指定时间(毫秒)后就执行
properties.put(ProducerConfig.LINGER_MS_CONFIG, 5);
KafkaProducer<String, User> producer = new KafkaProducer<String, User>(properties);
// 初始化事务
producer.initTransactions();
try {
// 开启事务
producer.beginTransaction();
for (int i = 200; i < 300; i++) {
if (i >= 208) {
// 模拟出异常
throw new RuntimeException("error");
}
ProducerRecord<String, User> producerRecord = new ProducerRecord<String, User>("topicUser","key"+i, new User("id" + i, "name" + i));
producer.send(producerRecord);
producer.flush();
}
// 没异常的话提交事务
producer.commitTransaction();
} catch (Exception e) {
System.out.println("exception:" + e);
// 有异常时回滚事务
producer.abortTransaction();
} finally {
producer.close();
}
消费端代码
// 配置事务级别,read_committed / read_uncommitted
properties.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG,"read_uncommitted");
注意:使用事务必须开启幂等写,acks必须设置为all
生产者消费者事务
消费者消费消息后,立马将详细再次写入kafka另一个消费队列,整个处理流程在一个事务执行
import org.apache.commons.lang3.SerializationUtils;
import org.apache.kafka.clients.admin.AdminClientConfig;
import org.apache.kafka.clients.admin.DescribeTopicsResult;
import org.apache.kafka.clients.admin.KafkaAdminClient;
import org.apache.kafka.clients.admin.TopicDescription;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.clients.producer.*;
import org.apache.kafka.common.Cluster;
import org.apache.kafka.common.PartitionInfo;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.Serializer;
import org.apache.kafka.common.serialization.StringDeserializer;
import org.apache.kafka.common.serialization.StringSerializer;
import org.apache.kafka.common.utils.Utils;
import java.io.Serializable;
import java.time.Duration;
import java.util.*;
import java.util.concurrent.atomic.AtomicInteger;
public class KafkaProducerDemo {
public static void main(String[] args) throws Exception {
KafkaProducer<String, User> producer = buildProducer();
KafkaConsumer<String,User> consumer = buildConsumer("group01");
producer.initTransactions();
consumer.subscribe(Arrays.asList("topicUser"));
while(true){
ConsumerRecords<String, User> consumerRecords = consumer.poll(Duration.ofMillis(100));
if(!consumerRecords.isEmpty()){
Map<TopicPartition, OffsetAndMetadata> offsetMap = new HashMap<>();
producer.beginTransaction();
try{
Iterator<ConsumerRecord<String, User>> consumerRecordIterator = consumerRecords.iterator();
while(consumerRecordIterator.hasNext()){
ConsumerRecord<String, User> record = consumerRecordIterator.next();
offsetMap.put(new TopicPartition(record.topic(),record.partition()),new OffsetAndMetadata(record.offset()));
User user = record.value();
user.setName("u- "+user.getName());
ProducerRecord<String,User> newRecord = new ProducerRecord<String,User>("topicUser2",record.key(),user);
producer.send(newRecord);
}
// 提交事务
producer.sendOffsetsToTransaction(offsetMap,"group01");
producer.commitTransaction();
}catch (Exception e){
System.out.println("出现异常,"+e.getMessage());
producer.abortTransaction();
}
}
}
}
private static KafkaConsumer buildConsumer(String groupId) {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"kafkaA:9092,kafkaB:9092,kafkaC:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, KafkaConsumerDemo.MyDeserializer.class.getName());
properties.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG,"read_committed");
properties.put(ConsumerConfig.GROUP_ID_CONFIG,groupId);
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
return new KafkaConsumer<String,User>(properties);
}
private static KafkaProducer buildProducer(){
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "kafkaA:9092,kafkaB:9092,kafkaC:9092");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, MySerializer.class.getName());
// 设置事务ID
properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, "transaction-id" + UUID.randomUUID().toString());
// 开启幂等写
properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, true);
// 设置应答方式
properties.put(ProducerConfig.ACKS_CONFIG, "all");
// 设置超时时间为20秒
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 20000);
// 批处理大小
properties.put(ProducerConfig.BATCH_SIZE_CONFIG, 1024);
// 批处理等到时间(毫秒),如果批处理中的数据不足1024大小
properties.put(ProducerConfig.LINGER_MS_CONFIG, 5);
return new KafkaProducer<String, User>(properties);
}
public static class MySerializer implements Serializer {
@Override
public void configure(Map configs, boolean isKey) {
}
@Override
public byte[] serialize(String topic, Object data) {
return SerializationUtils.serialize((Serializable) data);
}
@Override
public void close() {
}
}
}
Kafka主从同步
- LEO(log end offset)每个分区最后消息的下一个位置,分区的每一个副本都有一个LEO
- HW(High watermarker)高水位(0.11版本之前使用HW),所有HW之前的数据都理解为所有副本节点已经同步,当所有副本节点都备份成功,leader会更新水位线
- ISR(In-Sync-Replicas)leader会维护一份处于同步的
副本集合
(ISR列表),如果在replica.lag.time.max.ms
时间内,副本节点没有发送fetch请求,或者在发送请求,但是在限定时间内一直赶不上主节点的数据,这个副本节点就会被从ISR列表中剔除
使用HW(0.11版本之前)进行数据同步截断依据有可能会出现数据不一致问题
第一种情况 数据丢失:
- FollowerA 将数据M3(HW为3)从Leader fetch到,Leader更新HW为3,FollwerA还没来得及更新HW为3,FollowerA宕机
- FollowerA重启,发现HW是2,就将HW为3的数据抹掉
- 此时Leader挂掉,FollowerA被选举成Leader,那么整体的HW就还是变回了2,HW为3的数据M3丢失
第二种情况 数据不一致:
- FollowerA 将数据M3(HW为3)从Leader fetch到,Leader更新HW为3,FollwerA还没来得及更新HW为3,FollowerA宕机
- FollowerA重启,发现HW是2,就将HW为3的数据抹掉
- 此时Leader挂掉,FollowerA被选举成Leader,那么整体的HW就还是变回了2,HW为3的数据M丢失,重新写入3的数据N3
- Leader重启,HW是3,不用进行数据拉取同步,但是HW为3的位置数据是M3,不是N3,数据不一致问题产生
使用Leader epoch(0.11版本之后)进行数据同步消息截断的参考点
每次发生broker信息变更后,leader epoch就会变化,版本变更规则如下
0,0
1,200
2,400
3,500
- 如果Follower想从Leader fetch数据时,follower会拿自己的LEO对应leader epoch去Leader那查询
- 如果Follower拿的leader epoch是2,405,Leader返回版本3,500,Follower就会从Leader fetch 2,405-2,499的数据进行同步
- 如果Follower拿的leader epoch是2,550,Leader返回版本3,500,Follower就会将2,550-2,550的数据截断去除,然后从3,500开始进行数据拉取同步
因为数据写入的时候,是跟leader epoch一起写入的,是原子性操作,跟HW的两步更新(数据更新,HW更新)是不同的,这样在broker变更时可以保证数据不丢失和最终一致性
kafka eagle安装
下载安装包
下载地址: http://download.kafka-eagle.org/
解压安装包
tar -zxvf kafka-eagle-web-2.0.3-bin.tar.gz -C /usr/local/kafka-eagle
配置环境变量
vi /etc/profile
export KE_HOME=/usr/local/kafka-eagle/kafka-eagle-web-2.0.3
export PATH=${JAVA_HOME}/bin:$PATH:$KE_HOME/bin
source /etc/profile
修改kafka-eagle配置文件
vi /usr/local/kafka-eagle/kafka-eagle-web-2.0.3/conf/system-config.properties
kafka.eagle.zk.cluster.alias=cluster1
cluster1.zk.list=zookeeper:2181
#存储消费偏移量和生产偏移量之间的差异
cluster1.kafka.eagle.offset.storage=kafka
#cluster2.kafka.eagle.offset.storage=zk
#开启报表图,需要kafka开启jmx
kafka.eagle.metrics.charts=true
#kafka的topic修改时需要用到的token密钥
kafka.eagle.topic.token=keadmin
#不需要的认证注释掉
#cluster2.kafka.eagle.sasl.enable=false
#cluster2.kafka.eagle.sasl.protocol=SASL_PLAINTEXT
#cluster2.kafka.eagle.sasl.mechanism=PLAIN
#cluster2.kafka.eagle.sasl.jaas.config=org.apache.kafka.common.security.plain.PlainLoginModule required username="kafka" password="kafka-eagle";
#cluster2.kafka.eagle.sasl.client.id=
#cluster2.kafka.eagle.blacklist.topics=
#cluster2.kafka.eagle.sasl.cgroup.enable=false
#cluster2.kafka.eagle.sasl.cgroup.topics=
#cluster3.kafka.eagle.ssl.enable=false
#cluster3.kafka.eagle.ssl.protocol=SSL
#cluster3.kafka.eagle.ssl.truststore.location=
#cluster3.kafka.eagle.ssl.truststore.password=
#cluster3.kafka.eagle.ssl.keystore.location=
#cluster3.kafka.eagle.ssl.keystore.password=
#cluster3.kafka.eagle.ssl.key.password=
#cluster3.kafka.eagle.blacklist.topics=
#cluster3.kafka.eagle.ssl.cgroup.enable=false
#cluster3.kafka.eagle.ssl.cgroup.topics=
#不需要sqlite数据库,配置注释掉
#kafka.eagle.driver=org.sqlite.JDBC
#kafka.eagle.url=jdbc:sqlite:/hadoop/kafka-eagle/db/ke.db
#kafka.eagle.username=root
#kafka.eagle.password=www.kafka-eagle.org
#使用mysql数据
kafka.eagle.driver=com.mysql.jdbc.Driver
kafka.eagle.url=jdbc:mysql://127.0.0.1:3306/ke?useUnicode=true&characterEncoding=UTF-8&zeroDateTimeBehavior=convertToNull
kafka.eagle.username=root
kafka.eagle.password=123456
修改kafka启动脚本
vi /usr/local/kafka/kafka_2.11-2.2.0/bin/kafka-server-start.sh
if [ "x$KAFKA_HEAP_OPTS" = "x" ]; then
export KAFKA_HEAP_OPTS="-Xmx1G -Xms1G"
export JMX_PORT="7788" #这一行是需要加上的
fi
添加完重启kafka
添加ke执行权限
chmod u+x /usr/local/kafka-eagle/kafka-eagle-web-2.0.3/bin/ke.sh
启动ke
./ke.sh start
访问web界面
springboot与kafka集成
配置jar包
<parent>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-parent</artifactId>
<version>2.1.5.RELEASE</version>
</parent>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
</dependency>
</dependencies>
application.properties配置文件
###########【Kafka集群】###########
spring.kafka.bootstrap-servers=kafkaA:9092,kafkaB:9092,kafkaC:9092
###########【初始化生产者配置】###########
spring.kafka.producer.retries=5
spring.kafka.producer.acks=all
spring.kafka.producer.batch-size=16384
spring.kafka.producer.buffer-memory = 33554432
spring.kafka.producer.transaction-id-prefix=transaction-id-
#spring.kafka.producer.properties.linger.ms=0
spring.kafka.producer.key-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.value-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.properties.enable.idempotence=true
###########【初始化消费者配置】###########
spring.kafka.consumer.properties.group.id=group_01
spring.kafka.consumer.auto-offset-reset=earliest
spring.kafka.consumer.enable-auto-commit=true
spring.kafka.consumer.auto-commit-interval=100
spring.kafka.consumer.properties.isolation.level=read_committed
spring.kafka.consumer.key-deserializer=org.apache.kafka.common.serialization.StringDeserializer
spring.kafka.consumer.value-deserializer=org.apache.kafka.common.serialization.StringDeserializer
# spring.kafka.listener.type=batch
consumer消费者Demo
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.springframework.boot.SpringApplication;
import org.springframework.boot.autoconfigure.SpringBootApplication;
import org.springframework.kafka.annotation.KafkaListener;
import org.springframework.kafka.annotation.KafkaListeners;
import org.springframework.messaging.handler.annotation.SendTo;
import java.io.IOException;
@SpringBootApplication
public class KafkaDemoApplication {
public static void main(String[] args) throws IOException {
SpringApplication.run(KafkaDemoApplication.class, args);
System.in.read();
}
// 非事务模式
@KafkaListeners(
value = {
@KafkaListener(topics = "topic01", groupId = "g1")
}
)
public void receive(ConsumerRecord<String, String> record) {
System.out.println(record.key() + "\t" + record.value());
}
// 接收到topic01的信息,处理完将消息发送给topic02(非事务模式)
@KafkaListeners(
value = {
@KafkaListener(topics = "topic01", groupId = "g2")
}
)
@SendTo(value = "topic02")
public String receiveAndSend(ConsumerRecord<String, String> record) {
System.out.println(record.key() + "\t" + record.value());
return record.value() + " -- sywh";
}
}
Producer生产者Demo(非事务)
mport com.demo.kafka.KafkaDemoApplication;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = KafkaDemoApplication.class)
public class TestDemo {
@Autowired
private KafkaTemplate kafkaTemplate;
// 非事务状态下的消息发送
@Test
public void testSend1(){
this.kafkaTemplate.send(new ProducerRecord<String,String>("topic01","key-123","value-456"));
}
}
Producer生产者Demo(事务1)
application.properties配置
# 增加事务配置
spring.kafka.producer.transaction-id-prefix=transaction-id-
测试用例
import com.demo.kafka.KafkaDemoApplication;
import org.apache.kafka.clients.producer.ProducerRecord;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.kafka.core.KafkaOperations;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = KafkaDemoApplication.class)
public class TestDemo {
@Autowired
private KafkaTemplate kafkaTemplate;
// 事务状态下的消息发送
@Test
public void testSend2(){
this.kafkaTemplate.executeInTransaction(new KafkaOperations.OperationsCallback() {
@Override
public Object doInOperations(KafkaOperations kafkaOperations) {
kafkaOperations.send(new ProducerRecord<String,String>("topic01","tran-key-123","tran-value-456"));
return null;
}
});
}
}
Producer生产者Demo(事务2)
接口
public interface IMessageSender {
public void sendMessage(String topic,String key,String value);
}
实现类
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.stereotype.Service;
import org.springframework.transaction.annotation.Transactional;
@Service
@Transactional
public class MessageSenderImpl implements IMessageSender{
@Autowired
private KafkaTemplate kafkaTemplate;
@Override
public void sendMessage(String topic, String key, String value) {
this.kafkaTemplate.send(topic,key,value);
}
}
测试用例
import com.demo.kafka.IMessageSender;
import com.demo.kafka.KafkaDemoApplication;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.kafka.core.KafkaTemplate;
import org.springframework.test.context.junit4.SpringRunner;
@RunWith(SpringRunner.class)
@SpringBootTest(classes = KafkaDemoApplication.class)
public class TestDemo {
@Autowired
private KafkaTemplate kafkaTemplate;
// 事务状态下消息发送
@Autowired
private IMessageSender messageSender;
@Test
public void testMessageSender(){
this.messageSender.sendMessage("topic01","tran2-key-123","tran2-key-456");
}
}