搭建
准备工作
-
分布式集群中时钟同步
-
JDK1.8以上
-
ZooKeeper集群服务运行正常
安装
[root@Kafka0* ~]# tar -zxf kafka_2.11-2.2.0.tgz -C /usr
环境
修改kafka核心配置文件server.properties
[root@Kafka01 kafka_2.11-2.2.0]# vi config/server.properties
broker.id=0 | 1 | 2
listeners=PLAINTEXT://Kafka01|2|3:9092
log.dirs=/usr/kafka_2.11-2.2.0/data
zookeeper.connect=Kafka01:2181,Kafkak02:2181,,Kafka03:2181
启动服务
[root@Kafka0* kafka_2.11-2.2.0]# bin/kafka-server-start.sh config/server.properties
[root@Kafka0* kafka_2.11-2.2.0]# bin/kafka-server-stop.sh config/server.properties
基础使用
shell
- 新建Topic
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092 --topic t1 --partitions 3 --replication-factor 3 --create
- 展示Topic列表
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092 --list
- 删除Topic
[root@Kafka02 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092 --delete --topic t2
- 描述Topic
[root@Kafka02 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092 --describe --topic t1
- 修改Topic
[root@Kafka02 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092 --alter --topic t1 --partitions 5
- 发布
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-console-producer.sh --broker-list Kafka01:9092,Kafka02:9092,Kafka03:9092 --topic t1
- 订阅
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-console-consumer.sh --topic t1 --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092
Java
依赖
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>2.2.0</version>
</dependency>
生产者
public class ProducerDemo {
public static void main(String[] args) {
//1. 准备Kafka生产者配置信息
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"Kafka01:9092,Kafka02:9092,Kafka03:9092");
// string 序列化(Object ---> byte[])器
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);
//2. 创建kafka生产者对象
KafkaProducer<String, String> producer = new KafkaProducer<String, String>(properties);
//3. 生产记录并将其发布
ProducerRecord<String, String> record = new ProducerRecord<String, String>("t2", UUID.randomUUID().toString(),"Hello Kafka");
producer.send(record);
//4. 释放资源
producer.flush();
producer.close();
}
}
消费者
public class ConsumerDemo {
public static void main(String[] args) {
//1. 指定kafka消费者的配置信息
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
// 反序列化器 byte[] ---> Object
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
// 消费组必须得指定
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "group1");
//2. 创建kafka消费者对象
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(properties);
//3. 订阅主体topic
consumer.subscribe(Arrays.asList("t2"));
//4. 拉取新产生的记录
while (true) {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(10));
for (ConsumerRecord<String, String> record : records) {
System.out.println(record.key() + "\t" + record.value() + "\t"
+ record.topic() + "\t" + record.offset()
+ "\t" + record.timestamp() + "\t" + record.partition());
}
}
}
}
高级部分
偏移量控制
- 偏移量消费策略:
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
- 自动提交策略:
// 默认自动提交消费的位置offset
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true);
// 默认每隔5秒提交一次消费位置
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,5000);
- 通常情况需要手动提交消费位置:
// 关闭消费位置offset的自动提交功能
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);
// 手动提交消费位置
consumer.commitSync();
消费方式
订阅
// 订阅(消费)Topic所有的分区
consumer.subscribe(Arrays.asList("t3"));
指定消费分区
// 指定消费Topic的特定分区
consumer.assign(Arrays.asList(new TopicPartition("t3",0)));
重置消费位置
consumer.assign(Arrays.asList(new TopicPartition("t3",0)));
// 重置消费位置
consumer.seek(new TopicPartition("t3",0),1);
自定义对象
依赖
<dependency>
<groupId>commons-lang</groupId>
<artifactId>commons-lang</artifactId>
<version>2.4</version>
</dependency>
编解码器类
/**
* 自定义对象的编解码器类
*/
public class ObjectCodec implements Serializer, Deserializer {
/**
* bytes[] ---> Object
* @param s
* @param bytes
* @return
*/
@Override
public Object deserialize(String s, byte[] bytes) {
return SerializationUtils.deserialize(bytes);
}
@Override
public void configure(Map map, boolean b) {
}
/**
* Object ---> bytes[]
* @param s
* @param o
* @return
*/
@Override
public byte[] serialize(String s, Object o) {
return SerializationUtils.serialize((Serializable) o);
}
@Override
public void close() {
}
}
测试
- 生产者
public class ProducerDemo {
public static void main(String[] args) {
//1. 准备Kafka生产者配置信息
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"Kafka01:9092,Kafka02:9092,Kafka03:9092");
// string 序列化(Object ---> byte[])器
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,ObjectCodec.class);
//2. 创建kafka生产者对象
KafkaProducer<String, User> producer = new KafkaProducer<String, User>(properties);
//3. 生产记录并将其发布
for (int i = 1; i < 10; i++) {
// key不为null 第一种策略
ProducerRecord<String, User> record = new ProducerRecord<String, User>("t4", UUID.randomUUID().toString(),
new User(i,"zs:"+i,new Date()));
// key为null 轮询策略
producer.send(record);
}
//4. 释放资源
producer.flush();
producer.close();
}
}
- 消费者
public class ConsumerDemo {
public static void main(String[] args) throws InterruptedException {
//1. 指定kafka消费者的配置信息
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
// 反序列化器 byte[] ---> Object
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ObjectCodec.class);
// 注意:此配置项 修改偏移量消费策略的默认行为
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
// 关闭消费位置offset的自动提交功能
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
//properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,5000);
// 消费组必须得指定
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "group1");
//2. 创建kafka消费者对象
KafkaConsumer<String, User> consumer = new KafkaConsumer<String, User>(properties);
//3. 订阅主体topic
consumer.subscribe(Arrays.asList("t4"));
//4. 拉取新产生的记录
while (true) {
ConsumerRecords<String, User> records = consumer.poll(Duration.ofSeconds(10));
for (ConsumerRecord<String, User> record : records) {
User user = record.value();
System.out.println(user);
}
// 手动提交消费位置
consumer.commitSync();
}
}
}
生产者批量发送
使用方法
properties.put(ProducerConfig.BATCH_SIZE_CONFIG,16384);
properties.put(ProducerConfig.LINGER_MS_CONFIG,2000);
Kafka和Spring Boot整合
依赖
dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-web</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.kafka</groupId>
<artifactId>spring-kafka-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
配置文件
spring.kafka.bootstrap-servers= Kafka01:9092,Kafka02:9092,Kafka03:9092
spring.kafka.consumer.group-id=g1
spring.kafka.producer.key-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.value-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.consumer.key-deserializer=org.apache.kafka.common.serialization.StringDeserializer
spring.kafka.consumer.value-deserializer=org.apache.kafka.common.serialization.StringDeserializer
生产者
@Component
public class KafkaProducerDemo {
@Autowired
private KafkaTemplate<String,String> template;
// 计划任务,定时发送数据
// cron 秒 分 时 日 月 周 年(省略)
@Scheduled(cron = "0/10 * * * * ?")
public void send(){
template.send("t5", UUID.randomUUID().toString(),"Hello Kafka");
//System.out.println(new Date());
}
}
消费者
@Component
public class KafkaConsumerDemo {
@KafkaListener(topics = "t5")
public void receive(ConsumerRecord<String, String> record) {
System.out.println(record.key() + "\t" + record.value());
}
}
生产者幂等操作
properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG,true); // 开启幂等操作支持
properties.put(ProducerConfig.ACKS_CONFIG,"all"); // ack时机 -1或者all 所有 1 leader 0 立即应答
properties.put(ProducerConfig.RETRIES_CONFIG,5); // 重复次数
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 3000); // 请求超时时间
Kafka事务
生产者
public class ProducerDemo {
public static void main(String[] args) {
//1. 准备Kafka生产者配置信息
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"Kafka01:9092,Kafka02:9092,Kafka03:9092");
// string 序列化(Object ---> byte[])器
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);
// 事务ID, 唯一不可重复
properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG,UUID.randomUUID().toString());
// 开启幂等操作支持
properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG,true);
properties.put(ProducerConfig.ACKS_CONFIG,"all"); // ack时机 -1或者all 所有 1 leader 0 立即应答
properties.put(ProducerConfig.RETRIES_CONFIG,5); // 重复次数
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 3000); // 请求超时时间
//2. 创建kafka生产者对象
KafkaProducer<String, String> producer = new KafkaProducer<String, String>(properties);
// 初始化事务
producer.initTransactions();
// 开启事务
producer.beginTransaction();
try {
//3. 生产记录并将其发布
for (int i = 50; i < 60; i++) {
if(i == 56) {
int m = 1/0; //人为制造错误
}
// key不为null 第一种策略
ProducerRecord<String, String> record = new ProducerRecord<String, String>("t3", UUID.randomUUID().toString(),"Hello Kafka"+i);
// key为null 轮询策略
producer.send(record);
}
// 提交事务
producer.commitTransaction();
} catch (Exception e) {
e.printStackTrace();
// 取消事务
producer.abortTransaction();
} finally {
//4. 释放资源
producer.flush();
producer.close();
}
}
}
消费者
// 其余代码 一致
// 修改消费者默认的事务隔离级别
properties.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG,"read_committed");
消费生产并存事务
public class ConsumeTransformProduceDemo {
public static void main(String[] args) {
//1. 初始化生产者和消费者的配置对象
KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(consumerConfig());
KafkaProducer<String, String> producer = new KafkaProducer<>(producerConfig());
//2. 消费者订阅topic
consumer.subscribe(Arrays.asList("t6"));
//3. 事务操作
producer.initTransactions();
while (true) {
producer.beginTransaction();
try {
ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(5));
Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
for (ConsumerRecord<String, String> record : records) {
// 需要业务处理的内容
System.out.println(record.key() + "--->" + record.value());
producer.send(new ProducerRecord<String,String>("t7","t7:"+record.value()));
// 模拟错误
// int m = 1/0;
// 将消费位置记录到map集合中
offsets.put(new TopicPartition("t6",record.partition()),new OffsetAndMetadata(record.offset()+1));
}
// 维护消费位置 将事务内的消费位置信息 提交到kafka中
producer.sendOffsetsToTransaction(offsets,"g1");
// 正确操作 提交事务
producer.commitTransaction();
} catch (Exception e) {
e.printStackTrace();
producer.abortTransaction();
}
}
}
public static Properties producerConfig() {
Properties properties = new Properties();
properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString());
properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, Boolean.TRUE);
properties.put(ProducerConfig.RETRIES_CONFIG, 5);
properties.put(ProducerConfig.ACKS_CONFIG, "all");
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 3000);
properties.put(ProducerConfig.BATCH_SIZE_CONFIG, 16384);
properties.put(ProducerConfig.LINGER_MS_CONFIG, 2000);
return properties;
}
public static Properties consumerConfig() {
Properties properties = new Properties();
properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
properties.put(ConsumerConfig.GROUP_ID_CONFIG, "g1");
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
properties.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
return properties;
}
}
Kafka Streaming
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-streams</artifactId>
<version>2.2.0</version>
</dependency>
public class WordCountApplication {
public static void main(String[] args) {
//1. 指定流处理应用的配置信息
Properties properties = new Properties();
properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:90922");
properties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
properties.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-highlevel-application");
properties.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3);
//2. 编织拓扑任务
StreamsBuilder sb = new StreamsBuilder();
KStream<String, String> stream = sb.stream("t10");
KTable<String, Long> kTable = stream
// null hello
// null world
.flatMap((key, value) -> {
String[] words = value.toLowerCase().split(" ");
ArrayList<KeyValue<String, String>> list = new ArrayList<>();
for (String word : words) {
KeyValue<String, String> keyValue = new KeyValue<>(key, word);
list.add(keyValue);
}
return list;
})
.map((k, v) -> new KeyValue<String, Long>(v, 1L))
.groupByKey(Grouped.with(Serdes.String(), Serdes.Long()))
.count();
kTable.toStream().to("t11", Produced.with(Serdes.String(), Serdes.Long()));
Topology topology = sb.build();
// 打印自动生产的Topology信息
System.out.println(topology.describe().toString());
//3. 初始化流处理应用
KafkaStreams kafkaStreams = new KafkaStreams(topology, properties);
//4. 启动流处理应用
kafkaStreams.start();
}
}
操作算子
无状态的操作算子(stateless)
无状态的操作算子, 指进行数据转换操作时不会涉及到状态的管理
-
Branch
KStream ----> KStream[]
KStream<String, String>[] kStreams = stream.branch( (k, v) -> v.startsWith("A"), // stream: A开头 (k, v) -> true // 其它数据 ); kStreams[0].foreach((k,v) -> System.out.println(k + "\t"+v));
-
Filter
KStream —> KStream
保留符合Boolean条件(true)的数据
stream .filter((k,v) -> v.startsWith("H")) .foreach((k,v) -> System.out.println(k+"\t"+v));
-
filterNot
KStream → KStream
KTable → KTable
保留不符合Boolean条件的数据
stream .filterNot((k,v) -> v.startsWith("H")) .foreach((k,v) -> System.out.println(k+"\t"+v));
-
FlatMap
KStream → KStream
将一个Record展开为0-n个Record
stream .flatMap((k,v) -> Arrays.asList( new KeyValue<String,String>(k,v.toUpperCase()+"!"), new KeyValue<String,String>(k,v.toLowerCase()+"?"))) .foreach((k,v) -> System.out.println(k +"\t" + v));
-
flatMapValues
KStream → KStream
将一个Record的value展开为1到N个新的value(key不变)
stream // null Hello World //-------------------- // null Hello // null World .flatMapValues((v) -> Arrays.asList(v.split(" "))) .foreach((k, v) -> System.out.println(k + "\t" + v));
-
Foreach
KStream → void (终止操作)
对KStream中的数据进行迭代遍历,无返回值
stream // null Hello World //-------------------- // null Hello // null World .flatMapValues((v) -> Arrays.asList(v.split(" "))) .foreach((k, v) -> System.out.println(k + "\t" + v));
-
GroupBy
KStream → KGroupedStream
根据指定的信息 进行分区操作,注意分组时会进行Shuffle(洗牌)
//============================groupBy=================================== stream // null Hello World //-------------------- // null Hello // null World .flatMapValues((v) -> Arrays.asList(v.split(" "))) .groupBy((k,v) -> v) .count() .toStream() .foreach((k,v) -> System.out.println(k+"\t"+v)); //======================================================================
-
GroupByKey
KStream → KGroupedStream
根据已存在的key值进行分区操作(洗牌)
stream // null Hello World //-------------------- // null Hello // null World .flatMapValues((v) -> Arrays.asList(v.split(" "))) .map((k,v) -> new KeyValue<String,Long>(v,1L)) .groupByKey(Grouped.with(Serdes.String(),Serdes.Long())) .count() .toStream() .foreach((k,v) -> System.out.println(k+"\t"+v));
-
Map
KStream → KStream
将一个流中的一条数据映射为另外一条数据
-
mapValues
类似于map操作,不同key不可变,V可变
stream // null Hello World //-------------------- // null Hello // null World .flatMapValues((v) -> Arrays.asList(v.split(" "))) .map((k,v) -> new KeyValue<>(v,1L)) .mapValues(v -> v = v+1) .foreach((k,v) -> System.out.println(k+"\t"+v));
-
Merge
KStream → KStream
将两个流合并为一个大流
KStream<String, String>[] streams = stream .branch( (k, v) -> v.startsWith("A"), (k, v) -> v.startsWith("B"), (k, v) -> true ); streams[0].merge(streams[2]) .foreach((k,v) -> System.out.println(k+"\t"+v));
-
Peek
KStream → KStream
探针(调试程序): 不会改变数据流内容
stream.peek((k,v) -> System.out.println(k+"\t"+v));
-
Print
等价于
foreach((key, value) -> System.out.println(key + ", " + value))
stream.print(Printed.toSysOut());
-
SelectKey
KStream → KStream
给流中的数据,分配新的k值(k变,v不变)
stream.selectKey((k,v) -> "Hello:").print(Printed.toSysOut());
-
Table to Stream
KTable → KStream
table.toStream();
有状态的操作算子(stateful)
-
aggregate(聚合)
KGroupedStream --> KTable
滚动聚合: 根据分组的key,聚合values
stream // null Hello Hello .flatMapValues(v -> Arrays.asList(v.split(" "))) // null Hello // null Hello .groupBy((k,v) -> v,Grouped.with(Serdes.String(),Serdes.String())) // Hello [Hello,Hello].length // Hello 2+0 // 参数一: 初始化器 参数二:聚合器(k: word, v: []) .aggregate(()->0L,(k,v,aggs) -> aggs + 1L,Materialized.with(Serdes.String(),Serdes.Long())) .toStream() .print(Printed.toSysOut());
-
count(计数)
KGroupedStream → KTable
滚动聚合: 根据分组的key,统计value的数量
stream // null Hello Hello .flatMapValues(v -> Arrays.asList(v.split(" "))) // null Hello // null Hello .groupBy((k,v) -> v,Grouped.with(Serdes.String(),Serdes.String())) .count() .toStream() .print(Printed.toSysOut());
-
Reduce
KGroupedStream → KTable
滚动聚合:根据分组的key,合并value值列表
stream // null Hello Hello .flatMapValues(v -> Arrays.asList(v.split(" "))) // null Hello // null Hello .map((k,v) -> new KeyValue<String,Long>(v,1L)) .groupByKey(Grouped.with(Serdes.String(),Serdes.Long())) // Hello [1,1,1] // World [1,1,1,1] // 参数一: 初始化器 参数二:聚合器(k: word, v: []) .reduce((v1,v2) -> { System.out.println(v1 +"\t"+v2); Long result = v1+v2; return result; },Materialized.with(Serdes.String(),Serdes.Long())) .toStream() .print(Printed.toSysOut());
Window
Tumbling
public class KafkaStreamingWordCountWithWindow {
public static void main(String[] args) {
Properties properties = new Properties();
properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount22");
properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka01:9092");
properties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
properties.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
StreamsBuilder builder = new StreamsBuilder();
// 流处理 数据的来源
KStream<String, String> kStream = builder.stream("input");
kStream
.flatMap((k, v) -> {
ArrayList<KeyValue<String, String>> keyValues = new ArrayList<>();
String[] words = v.split(" ");
for (String word : words) {
keyValues.add(new KeyValue<String, String>(k, word));
}
return keyValues;
})
.map((k, v) -> new KeyValue<String, Long>(v, 1L))
.groupBy((k, v) -> k, Grouped.with(Serdes.String(), Serdes.Long()))
// 滚动窗口的大小
.windowedBy(TimeWindows.of(Duration.ofSeconds(10)))
.reduce((value1, value2) -> value1 + value2, Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("counts").withKeySerde(Serdes.String()).withValueSerde(Serdes.Long()))
.toStream()
.peek(((Windowed<String> key, Long value) -> {
Window window = key.window();
SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss");
long start = window.start();
long end = window.end();
System.out.println(sdf.format(start) + " ~ " + sdf.format(end) + "\t" + key.key() + "\t" + value);
}));
// 构建kafka streaming 应用
KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties);
kafkaStreams.start();
}
}
Hopping
Duration windowSizeMs = Duration.ofMinutes(5);
Duration advanceMs = Duration.ofMinutes(1);
TimeWindows.of(windowSizeMs).advanceBy(advanceMs);
Sliding
Session
SessionWindows.with(Duration.ofMinutes(5));