Kafka JavaAPI
如果面试问用的是高级API还是低级API时,因为网上对高低级的定义不一致,所以要先问对高低级的定义是什么,有的把手动提交定义为低级,
先导入依赖
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka_2.11</artifactId>
<version>1.0.2</version>
</dependency>
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>1.0.2</version>
</dependency>
操作主题
package com.bigdata.demo;
import org.apache.kafka.clients.admin.*;
import org.apache.kafka.common.KafkaFuture;
import org.junit.After;
import org.junit.Before;
import org.junit.Test;
import java.util.Collections;
import java.util.Map;
import java.util.Properties;
import java.util.Set;
import java.util.concurrent.ExecutionException;
/**
* Kafka的源码: java + scala
*/
public class KafkaDemo01 {
private AdminClient client;
@Before
public void init(){
Properties prop = new Properties();
// prop.setProperty("bootstrap.servers","hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(AdminClientConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
//创建客户端对象
client = AdminClient.create(prop);
}
//创建topic
@Test
public void test01() throws ExecutionException, InterruptedException {
NewTopic topic = new NewTopic("java",2,(short) 3);
CreateTopicsResult topicsResult = client.createTopics(Collections.singletonList(topic));
Map<String, KafkaFuture<Void>> map = topicsResult.values();
Set<Map.Entry<String, KafkaFuture<Void>>> entrySet = map.entrySet();
for (Map.Entry<String, KafkaFuture<Void>> entry : entrySet) {
System.out.println(entry.getKey()+","+entry.getValue().get());
}
}
//列出所有的topic
@Test
public void test02() throws ExecutionException, InterruptedException {
ListTopicsResult listTopics = client.listTopics();
KafkaFuture<Set<String>> names = listTopics.names();
Set<String> topics = names.get();
for (String topic : topics) {
System.out.println(topic);
}
}
//列出所有的topic,包括__consumer_offsets
@Test
public void test03() throws ExecutionException, InterruptedException {
ListTopicsOptions options = new ListTopicsOptions();
options.listInternal(true);
ListTopicsResult listTopics = client.listTopics(options);
KafkaFuture<Set<String>> names = listTopics.names();
Set<String> topics = names.get();
for (String topic : topics) {
System.out.println(topic);
}
}
//查看主题的描述信息
@Test
public void test04() throws ExecutionException, InterruptedException {
DescribeTopicsResult result = client.describeTopics(Collections.singletonList("java"));
Map<String, KafkaFuture<TopicDescription>> values = result.values();
Set<Map.Entry<String, KafkaFuture<TopicDescription>>> set = values.entrySet();
for (Map.Entry<String, KafkaFuture<TopicDescription>> entry : set) {
System.out.println(entry.getKey()+","+entry.getValue().get().toString());
}
}
//删除主题
@Test
public void test05(){
DeleteTopicsResult result = client.deleteTopics(Collections.singletonList("java"));
}
@After
public void close(){
client.close();
}
}
生产数据
package com.bigdata.demo;
import org.apache.kafka.clients.producer.*;
import org.apache.kafka.common.serialization.StringSerializer;
import java.util.Properties;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.Future;
/**
* 生产数据
* 1、异步 FireAndForgot
* 2、异步带回调
* 3、同步(异步带阻塞)
*
*/
public class KafkaDemo02 {
public static void main(String[] args) throws ExecutionException, InterruptedException {
// m01();
// m02();
m03();
}
//1、异步 FireAndForgot
public static void m01(){
Properties prop = new Properties();
prop.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
prop.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
//获取生产者客户端对象
KafkaProducer producer = new KafkaProducer(prop);
ProducerRecord<String, String> record = new ProducerRecord<String, String>("goods","java-demo01");
//生产数据
producer.send(record);
}
//2、异步带回调
public static void m02(){
Properties prop = new Properties();
prop.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
prop.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
//获取生产者客户端对象
KafkaProducer producer = new KafkaProducer(prop);
for (int i = 0; i < 6; i++) {
ProducerRecord<String, String> record = new ProducerRecord<String, String>("goods","java-demo00"+i);
//生产数据
producer.send(record, new Callback() {
@Override
public void onCompletion(RecordMetadata metadata, Exception exception) {
System.out.println(metadata.topic());
System.out.println(metadata.partition());
System.out.println(metadata.offset());
System.out.println("------------------------------");
}
});
producer.flush();
}
}
//3、同步(异步带阻塞)
public static void m03() throws ExecutionException, InterruptedException {
Properties prop = new Properties();
prop.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
prop.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName());
//获取生产者客户端对象
KafkaProducer producer = new KafkaProducer(prop);
ProducerRecord<String, String> record = new ProducerRecord<String, String>("goods","java-demo01");
//生产数据
Future future = producer.send(record);
future.get();//阻塞
producer.flush();
}
}
java实现线程
通过java实现线程的例子来说明生产数据 同步(异步带阻塞) 的情况
package com.bigdata.demo;
import java.util.concurrent.Callable;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.FutureTask;
/**
* java实现线程的三种方式:
* 1、继承Thread类
* 2、实现Runnable接口
* 3、实现Callable接口
*
*/
public class ThreadDemo {
static int sum01 = 0;
static int sum02 = 0;
public static void main(String[] args) throws ExecutionException, InterruptedException {
new ThreadDemo().m02();
}
public static void m01() throws InterruptedException {
Thread t1 = new Thread(new Runnable() {
@Override
public void run() {
for (int i = 0; i < 3; i++) {
sum01 += i;
}
}
});
Thread t2 = new Thread(new Runnable() {
@Override
public void run() {
for (int i = 0; i < 4; i++) {
sum02 += i;
}
}
});
t1.start();
t1.join();
t2.start();
t2.join();
System.out.println("main---------sum=== " + (sum01 + sum02));
}
public void m02() throws ExecutionException, InterruptedException {
FutureTask<Integer> task01 = new FutureTask<Integer>(new Call01());
FutureTask<Integer> task02 = new FutureTask<Integer>(new Call02());
Thread t1 = new Thread(task01);
Thread t2 = new Thread(task02);
t1.start();
t2.start();
//get --- 主线程阻塞,等待子线程返回的结果
System.out.println("main-------sum== " + (task01.get() + task02.get()));
}
class Call01 implements Callable<Integer>{
int s01 = 0;
@Override
public Integer call() throws Exception {
for (int i = 0; i < 3; i++) {
s01 += i;
}
return s01;
}
}
class Call02 implements Callable<Integer>{
int s02 = 0;
@Override
public Integer call() throws Exception {
for (int i = 0; i < 4; i++) {
s02 += i;
}
return s02;
}
}
}
//结果
main-------sum== 9
生产者默认分区规则
DefaultPartitioner默认的分区规则:
-
指定了分区,就使用这个分区 — 不会使用分区器
-
没有指定分区,指定了key,使用key的hash进行分区
-
没有指定分区和key,使用轮询
如果指定了自定义的分区器,就使用自己定义的分区规则
自定义分区器
-
实现Partitioner接口
-
覆写partition方法
package com.bigdata.demo01; import org.apache.kafka.clients.producer.Partitioner; import org.apache.kafka.common.Cluster; import java.util.Map; public class MyPartitioner implements Partitioner { @Override public int partition(String topic, Object key, byte[] keyBytes, Object value, byte[] valueBytes, Cluster cluster) { String ketStr = (String) key; if(ketStr.startsWith("a")){ return 0; }else if(ketStr.startsWith("b")){ return 1; }else{ return 2; } } @Override public void close() { } @Override public void configure(Map<String, ?> configs) { } }
-
设置分区器
package com.bigdata.demo01; import org.apache.kafka.clients.producer.KafkaProducer; import org.apache.kafka.clients.producer.ProducerConfig; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.common.serialization.StringSerializer; import java.util.ArrayList; import java.util.List; import java.util.Properties; /** * DefaultPartitioner默认的分区规则: * 1、指定了分区,就使用这个分区 --- 不会使用分区器 * 2、没有指定分区,指定了key,使用key的hash进行分区 * 3、没有指定分区和key,使用轮询 * * 如果指定了自定义的分区器,就使用自己定义的分区规则 * * * */ public class ProducerDemo01 { public static void main(String[] args) { Properties prop = new Properties(); prop.setProperty(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092"); prop.setProperty(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); prop.setProperty(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class.getName()); prop.setProperty(ProducerConfig.PARTITIONER_CLASS_CONFIG, MyPartitioner.class.getName()); List<String> list = new ArrayList<>(); list.add(MyInterceptor.class.getName()); prop.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,list); //获取生产者客户端对象 KafkaProducer producer = new KafkaProducer(prop); ProducerRecord<String, String> record; for (int i = 0; i < 6; i++) { if(i % 2 == 0){ record = new ProducerRecord<String, String>("goods","a"+i,"aaaaaa"+i); }else{ record = new ProducerRecord<String, String>("goods","b"+i,"bbbbbb"+i); } //生产数据 producer.send(record); } producer.flush(); } }
自定义拦截器
-
实现ProducerInterceptor接口
-
根据需要覆写onSend和onAcknowledgement方法
package com.bigdata.demo01; import org.apache.kafka.clients.producer.ProducerInterceptor; import org.apache.kafka.clients.producer.ProducerRecord; import org.apache.kafka.clients.producer.RecordMetadata; import java.util.Map; public class MyInterceptor implements ProducerInterceptor<String,String> { /** * 消息发送的时候拦截 * * 将发送的消息 + 时间戳 */ @Override public ProducerRecord<String, String> onSend(ProducerRecord<String, String> record) { String oldValue = record.value(); String newValue = oldValue + System.currentTimeMillis(); return new ProducerRecord<String, String>(record.topic(),record.key(),newValue); } /* * 得到响应的时候拦截 */ @Override public void onAcknowledgement(RecordMetadata metadata, Exception exception) { if(exception == null){ System.out.println("exception-------"); } } @Override public void close() { } @Override public void configure(Map<String, ?> configs) { } }
-
添加配置
注意:由于拦截器可以有多个,传入的是list集合,不能用prop.setProperty()方法,要用put()
List<String> list = new ArrayList<>(); list.add(MyInterceptor.class.getName()); prop.put(ProducerConfig.INTERCEPTOR_CLASSES_CONFIG,list);
flume作为生产者
a1.sources = r1
a1.channels = c1
a1.sinks = k1
# 配置source
a1.sources.r1.type = netcat
a1.sources.r1.bind = hadoop101
a1.sources.r1.port = 6666
# 配置channel
a1.channels.c1.type = memory
# 配置sink
a1.sinks.k1.type = org.apache.flume.sink.kafka.KafkaSink
a1.sinks.k1.kafka.topic = java
a1.sinks.k1.kafka.bootstrap.servers = hadoop101:9092,hadoop102:9092,hadoop103:9092
a1.sinks.k1.kafka.producer.acks = -1
# 绑定channel
a1.sources.r1.channels = c1
a1.sinks.k1.channel = c1
消费者拉取数据
package com.bigdata.demo;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Collections;
import java.util.Iterator;
import java.util.Properties;
/**
* 消费者
* enable.auto.commit = true 自动提交偏移量
* auto.commit.interval.ms = 5000 自动提交偏移量的间隔时间
*
*
* 我们正常的流程应该是数据被消费了之后提交偏移量
* 假设是从kafka中获取到数据之后将数据存入MySQL数据库,数据存入成功才算被消费
*
* 所以实际开发一定是手动提交偏移量
*
*/
public class KafkaDemo03 {
public static void main(String[] args) {
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g01");
//创建消费者客户端对象
KafkaConsumer consumer = new KafkaConsumer<String,String>(prop);
//订阅主题
consumer.subscribe(Collections.singletonList("goods"));
while(true){
//消费数据
ConsumerRecords records = consumer.poll(3000);
Iterator<ConsumerRecord<String,String>> it = records.iterator();
while(it.hasNext()){
ConsumerRecord<String, String> record = it.next();
System.out.println(record.key() +"," + record.value());
}
}
}
}
package com.bigdata.demo;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Collections;
import java.util.Iterator;
import java.util.Properties;
/**
* 消费者
* enable.auto.commit = true 自动提交偏移量
* auto.commit.interval.ms = 5000 自动提交偏移量的间隔时间
*
*
* 我们正常的流程应该是数据被消费了之后提交偏移量
* 假设是从kafka中获取到数据之后将数据存入MySQL数据库,数据存入成功才算被消费
*
* 所以实际开发一定是手动提交偏移量
*
*/
public class KafkaDemo04 {
public static void main(String[] args) {
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g01");
//禁用自动提交偏移量
prop.setProperty(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
//创建消费者客户端对象
KafkaConsumer consumer = new KafkaConsumer<String,String>(prop);
//订阅主题
consumer.subscribe(Collections.singletonList("goods"));
// try {
while(true){
//消费数据
ConsumerRecords<String,String> records = consumer.poll(3000);
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.key() +"," + record.value());
//jdbc
}
//提交偏移量
/**
* commitSync:同步提交
* 优点:
* 1、消费者客户端等待提交的响应
* 2、失败了可以重试
* 缺点:效率低
*
* commitAsync //异步提交
* 优点:效率高
* 缺点: 不知道提交后的结果
*/
// consumer.commitSync();//同步提交
consumer.commitAsync();//异步提交
}
// }
//最后执行同步提交是防止异步提交失败的情况
// finally {
// consumer.commitSync();//同步提交
// }
}
}
自定义偏移量
package com.bigdata.demo;
import kafka.consumer.SimpleConsumer;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Collections;
import java.util.Iterator;
import java.util.Properties;
/**
* 消费者
* enable.auto.commit = true 自动提交偏移量
* auto.commit.interval.ms = 5000 自动提交偏移量的间隔时间
* auto.offset.reset:
* latest(默认): 如果没有offset,将从最新的数据开始消费;有offset就从offset开始消费
* earliest:如果没有offset,将从头开始消费;有offset就从offset开始消费
*
*/
public class KafkaDemo05 {
public static void main(String[] args) {
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g02");
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//创建消费者客户端对象
KafkaConsumer consumer = new KafkaConsumer<String,String>(prop);
//订阅主题
consumer.subscribe(Collections.singletonList("goods"));
while(true){
//消费数据
ConsumerRecords records = consumer.poll(3000);
Iterator<ConsumerRecord<String,String>> it = records.iterator();
while(it.hasNext()){
ConsumerRecord<String, String> record = it.next();
System.out.println(record.key() +"," + record.value());
}
}
}
}
指定消费分区和分区的offset
package com.bigdata.demo01;
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.Arrays;
import java.util.Properties;
/**
* 消费指定分区
* 指定offset
*/
public class KafkaTest01 {
public static void main(String[] args) {
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g02");
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//创建消费者客户端对象
KafkaConsumer consumer = new KafkaConsumer<String,String>(prop);
//指定消费分区
TopicPartition tp = new TopicPartition("goods",1);
consumer.assign(Arrays.asList(tp));
//指定消费的offset
consumer.seek(tp,10);
while(true){
//poll 轮询
//timeout:如果缓冲区中没有数据,则轮询中等待所花费的时间(以毫秒为单位)。
// 如果为0,则立即返回缓冲区中当前可用的所有记录,否则返回空。不能是负的。
ConsumerRecords<String,String> records = consumer.poll(3000);
for (ConsumerRecord<String, String> record : records) {
System.out.println("partition===" + record.partition());
System.out.println("offset===" + record.offset());
System.out.println("value===" + record.value());
System.out.println("-------------------------------");
}
}
}
}
指定消费者同步提交的数据
package com.bigdata.demo01;
import org.apache.kafka.clients.consumer.*;
import org.apache.kafka.clients.producer.ProducerConfig;
import org.apache.kafka.common.TopicPartition;
import org.apache.kafka.common.serialization.StringDeserializer;
import java.util.*;
public class KafkaTest02 {
public static void main(String[] args) {
Properties prop = new Properties();
prop.setProperty(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG,"hadoop101:9092,hadoop102:9092,hadoop103:9092");
prop.setProperty(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class.getName());
prop.setProperty(ConsumerConfig.GROUP_ID_CONFIG,"g02");
prop.setProperty(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
//创建消费者客户端对象
KafkaConsumer consumer = new KafkaConsumer<String,String>(prop);
consumer.subscribe(Arrays.asList("goods"));
while(true){
ConsumerRecords<String,String> records = consumer.poll(3000);
Set<TopicPartition> set = records.partitions();
HashMap<TopicPartition,OffsetAndMetadata> map = new HashMap<>();
for (TopicPartition tp : set) {
List<ConsumerRecord<String, String>> list = records.records(tp);
long offset = list.get(list.size()-1).offset() + 1;
OffsetAndMetadata metadata = new OffsetAndMetadata(offset);
map.put(tp,metadata);
}
consumer.commitSync(map);
}
}
}
subscribe方法
- 订阅主题
- 如果分区发生变化可以自动重平衡(ConsumerRebalanceListener)
public void subscribe(Collection<String> topics, ConsumerRebalanceListener callback);
assign方法
- 可以指定分区进行消费
- 如果分区发生变化不会自动重平衡