查看有多少个topic
./kafka-topics.sh --bootstrap-server 10.10.5.252:9092 --list
创建topic
./kafka-topics.sh --bootstrap-server 10.10.5.252:9092 --topic windaka_One e --create --partitions 1 --replication-factor 2
查看主题信息
./kafka-topics.sh --bootstrap-server 10.10.5.252:9092 --topic windaka_One --describe
增加分区
./kafka-topics.sh --bootstrap-server 10.10.5.250:9092 --topic windaka_One --alter --partitions 3
生产者消费者发送接收信息
./kafka-console-producer.sh --bootstrap-server 10.10.5.252:9092 --topic windaka_One ./kafka-console-consumer.sh --bootstrap-server 10.10.5.252:9092 --topic windaka_One
异步发送
<dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-clients</artifactId> <version>3.0.0</version> </dependency>
public class CustomProducer{ public static void main(String[] args){ /*属性配置*/ Properties properties = new Properties(); /*链接属性设置*/ properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"10.10.5.252:9092,10.10.5.250:9092"); /*序列号熟悉设置*/ properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,StringSerializer.class.getName()); properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class.getName()); /*创建kafka生产者*/ kafkaProducer<String,String> producer_0 = new kafkaProducer<String,String>(properties); /*发送数据*/ producer_0.send(new ProducerRecord<>("windaka_One(主题)“,"消息内容 Test-kafka")); /*关闭资源*/ producer_0.close(); } }
带回调函数的异步发送
/*发送数据*/ producer_0.send(new ProducerRecord<>("windaka_One主题","发送数据内容"), new Callback(){ @Override public void onCompletion(RecordMetadata record,Exception e){ if(e == null){ System.out.println("Topic:"+record.topic()); System.out.println("分区:"+record.partition()); } } });
同步发送
producer_0.send(new ProducerRecord<>("主题","发送数据内容")).get();
自定义分区器
实现一个类继承Partitioner
public class MyPartitioner implements Partitioner{ @Override public int partition(String topic,Object o,byte[] bytes,Object value,byte[] bytes1,Cluster cluster){ String msgValue = value.toString(); int partition; if(msgValue.contains("windaka")){ partition = 0; }else{ partition = 1; } return partition; } }
属性设置里面进行关联自定义分区设置
/*关联自定义分区设置 *第二个参数为自定义关联分区类的全类名*/ properties.put(ProducerConfig.PARTITIONER_CLASS_CONFIG,"com.kafka.test.MyPartitioner");
生产者如何提高吞吐量
/*设置缓冲区大小 *默认32M*/ properties.put(ProducerConfig.BUFFER_MEMORY_CONFIG,33554422); /*设置批次大小 *默认16k*/ properties.put(ProducerConfig.BATCH_SIZE_FONGI,16384); /*linger ms *默认值为0,可根据实际情况设置5-100ms之间*/ properties.put(ProducerConfig.LINGER_MS_CONFIG,5); /*压缩类型*/ properties.put(ProducerConfig.COMPRESSION_TYPE_CONFIG,"snappy");
生产经验-数据可靠性
/*Acks*/ properties.put(ProducerConfig.ACKS_CONFIG,"1"); /*重试次数 *默认次数为int类型最大值*/ properties.put(ProducerConfig.RETRIES_CONFIG,3);
数据去重-正好一次
public class CustomProducer{ public static void main(String[] args){ /*熟悉配置*/ Properties properties = new Properties(); /*连接集群*/ properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"10.10.5.252:9092,10.10.5.250:9092"); /*序列化*/ properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG,StringSerializer.class.getName()); properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class.getName()); /*Acks *启用事务必须设置为all或-1*/ properties.put(ProducerConfig.ACKS_CONFIG,"-1"); /*设置全局唯一事务id *windaka01为自定义的事务id*/ properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG,"windaka01"); /*创建kafka生产者*/ KafkaProducer<String,String> producer_0 = new KafkaProducer<String,String>(properties); /*初始化事务*/ producer_0.initTransactions(); /*开启事务*/ producer_0.beginTransaction(); try{ producer_0.send(new ProducerRecord<>("topic","发送数据内容")); /*提交事务*/ producer_0.commitTransaction(); }catch(Exception e){ /*发生异常,中止事务*/ producer_0.abortTransaction(); }finally{ /*关闭资源*/ producer_0.close(); } } }
生产经验-数据有序
单分区内有序
多分区,分区与分区间无序
Zookeeper常用命令
bin/zkCli.sh
增加服役新节点,负载均衡
(1)创建一个要均衡的主题
(3)创建执行副本
vi increase-replication-factor.json
把第二步生成的均衡计划复制到执行副本里面。
退役旧节点
(6).停止退役节点上kafka服务即可。
kafka副本
Leader Partition自动平衡
Ps:
建议不使用再平衡。
增加副本
kafka文件存储机制
命令:
kafka-run-class.sh kafka.tools.DumpLogSegments --files ./0000000000000000.log
日志清理策略
kafka高效读写数据原理
消费者
消费一个主题数据
public class CustomConsumer{ public static void main(String[] args){ /*配置熟悉*/ Properties pro = new Properties(); /*链接熟悉配置*/ pro.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"10.10.5.252:9092,10.10.5.250:9092"); /*反序列化属性配置*/ pro.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); pro.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); /*配置全局消费者group id*/ pro.put(ConsumerConfig.GROUP_ID_CONFIG,"test消费者group id"); /*创建一个消费者*/ KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(pro); /*订阅主题*/ ArrayList<String> topic = new ArrayList<>(); /*订阅的主题*/ topic.add("windaka_One"); consumer.subscribe(topic); /*循环消费*/ while(true){ /*Duration.ofseconds(1)为每次拉取的间隔时间*/ ConsumerRecords<String,String> conRecords = consumer.poll(Duration.ofseconds(1)); for(ConsumerRecord<String,String> record : conRecords){ System.out.println(record.topic()); System.out.println(record.value()); System.out.println(record.partition()); } } } }
消费一个主题下的某个分区数据
public class CustomConsumer{ public static void main(String[] args){ /*属性配置*/ Properties pro = new Properties(); /*连接属性*/ pro.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"10.10.5.252:9092,10.10.5.250:9092"); /*配置全局消费者group id*/ pro.put(ConsumerConfig.GROUP_ID_CONFIG,"test"); /*反序列化属性*/ pro.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); pro.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserialize.class.getName()); /*创建一个消费者*/ KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(pro); /*订阅主题对应的分区*/ ArrayList<TopicPartition> topicPart = new ArrayList<>(); /*订阅主题为windaka_One,分区为0的消息*/ topicPart.add(new TopicPartition("windaka_One",0)); consumer.assign(topicPart); /*循环消费*/ while(true){ ConsumerRecords<String,String> conRecords = consumer.poll(Duration.ofSeconds(1)); for(ConsumerRecord<String,String> record : conRecords){ System.out.println(record.topic()); System.out.println(record.value()); System.out.println(record.partition()); } } } }
消费者分区策略
-->Range
-->RoundRobin
/*设置分区分配策略*/
pro.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,"org.apache.kafka.clients.consumer.RoundRobinAssignor");
-->Sticky
/*设置分区分配策略*/ pro.put(ConsumerConfig.PARTITION_ASSIGNMENT_STRATEGY_CONFIG,"org.apache.kafka.clients.consumer.StickyAssignor");
消费者offset
0.9版本之前维护在zookeep中;
0.9版本之后维护在消费者主题当中;
自动offset提交
/*配置自动提交offset,其实默认就是自动提交*/ pro.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true); /*自动提交间隔,1000ms*/ pro.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,1000);
手动提交offset
--同步提交
public class CustomConsumer{ public static void main(String[] args){ Properties pro = new Properties(); pro.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"10.10.5.252:9092,10.10.5.250:9092"); pro.put(ConsumerConfig.GROUP_ID_CONFIG,"test"); pro.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); pro.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); /*配置手动提交offset,默认是自动提交*/ pro.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false); KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(pro); ArrayList<String> topic = new ArrayList<>(); topic.add("windaka_One"); consumer.subscribe(topic); while(true){ ConsumerRecords<String,String> conRecords = consumer.poll(Duration.ofSeconds(1)); for(ConsumerRecord<String,String> record : conRecords){ System.out.println(record.topic()); System.out.println(record.value()); } /*同步提交*/ consumer.commitSync(); } } }
--异步提交
public class CustomConsumer{ public static void main(String[] args){ Properties pro = new Properties(); pro.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"10.10.5.252:9092,10.10.5.250:9092"); pro.put(ConsumerConfig.GROUP_ID_CONFIG,"test"); pro.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); pro.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG,StringDeserializer.class.getName()); /*配置手动提交offset,默认是自动提交*/ pro.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false); KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(pro); ArrayList<String> topic = new ArrayList<>(); topic.add("windaka_One"); consumer.subscribe(topic); while(true){ ConsumerRecords<String,String> conRecords = consumer.poll(Duration.ofSeconds(1)); for(ConsumerRecord<String,String> record : conRecords){ System.out.println(record.topic()); System.out.println(record.value()); } /*异步提交*/ consumer.commitAsync(); } } }
消费者指定offset进行消费
/*创建一个消费者*/ KafkaConsumer<String,String> consumer = new KafkaConsumer<String,String>(pro); ArrayList<String> topic = new ArrayList<>(); topic.add("windaka_One"); consumer.subscribe(topic); /*指定offset位置进行消费*/ Set<TopicPartition> assignment = consumer.assignment(); /*保证分区分配方案已经制定完毕*/ while(assignment.size() == 0){ consumer.poll(Duration.ofSeconds(1)); assignment = consumer.assignment(); } for(TopicPartition topicPart : assignment){ /*指定每个分区,从offset=100位置开始消费*/ consumer.seek(topicPart,100); }
指定消费时间进行消费
/*指定位置进行消费*/ Set<TopicPartition> assignment = consumer.assignment(); /*保证分区分配方案已经制定完毕*/ while(assignment.size() == 0){ consumer.poll(Duration.ofSeconds(1)); assignment = consumer.assignment(); } /*把时间转换成对应的offset*/ HashMap<TopicPartition,Long> topicPartHsM = new HashMap<>(); for(TopicPartition topicPartition : assignment){ topicPartHsm.put(topicPartition,System.currentTimeMillis()*1*24*3600*1000); } Map<TopicPartition,OffsetAndTimeStatamp> topicPartOfHsm = consumer.offsetForTimes(topicPartHsM); for(TopicPartition topicPart : assignment){ OffsetAndTimestamp offsetTimeStamp = topicPartOfHSM.get(topicPart); consumer.seek(topicPart,offsetTimeStamp.offset()); } /*循环消费*/
数据积压-消费者如何提供吞吐量
EFAK(eagle)
EFAK原名为eagle,是kafka的开源可视化监控系统。
Kafka-Kraft模式
2.8.0版本以后新模式,目的是为了去zookeeper化
(不安装启动zookeeper,一样使用kafka)
conf/kraft/server.properties
/*每一个节点可以配置不同角色,可以一个节点即使broker,也是controller *controller负责选举,broker负责存储数据*/ process.roles = broker,controller /*全局唯一,类似broker.id*/ node.id = 252 /*负责选举*/ controller.quorum.voters=252@10.10.5.252:9093,250@10.10.5.250:9093 advertised.listeners=PLANTEXT://10.10.5.252:9092 log.dirs=/xxx/xxx/xxx
外部组件
kafka集成Flume
1.下载解压Flume
2. 删除不兼容包
rm -rf guava-11.0.2.jar
3.安装配置hadoop
4.vi log4j.properties
配置log目录
5.配置Flume
a1.sources.r1.filegroups.f1 = /var/log/xxx (监控的文件)
6. 启动Flume
外部组件
kafka集成Flink
集成方法不再记述
外部组件
kafka集成SpringBoot
@RestController public class ProducerController{ @Autowired KafkaTemplate kafkaTest; @RequestMapping("/windaka") public String data(String msg){ kafkaTest.send("topic",msg); return "ok"; } }
application.properties spring.kafka.bootstrap-servers=10.10.5.252:9092,10.10.5.250:9092 spring.kafka.producer.key-serializer=org.apache.kafka.common.serialization.StringSerializer spring.kafka.producer.value-serializer=org.apache.kafka.common.serialization.StringSerializer
消费者
@Configuaration public class KafkaConsumer{ @KafkaListener(topics="windaka_One") public void consumerTopic(){ System.out.println("消费kafka消息:"+msg); } } @KafkaListener为监听对应主题的kafka消息注解
application.properties spring.kafka.consumer.group-id=windaka spring.kafka.consumer.key-deserializer=org.apache.kafka.common.serialization.StringDeserializer spring.kafka.consumer.value-deserializer=org.apache.kafka.common.serialization.StringDeserializer
外部组件
kafka集成Spark
Kafka生产调优
查看kafka堆内存使用率
命令:jmap -heap 2315(kafka进程号)
使用率19.45%,根据使用率,决定是否调整堆内存大小。
kafka压力测试
生产者压力测试
测试betch.size为4k,16k,32k时的吞吐量
测试linger.ms为0,50时的吞吐量
测试compression.type为snappy,zstd,gzip的吞吐量
调整缓存大小,测试
buffer.memory=67108864
消费者压力测试