Kafka Streaming WordCount示例
代码示例
import org.apache.kafka.common.serialization.Serde;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.common.utils.Bytes;
import org.apache.kafka.streams.Consumed;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Produced;
import org.apache.kafka.streams.state.KeyValueStore;
import java.util.Arrays;
import java.util.Properties;
public class WordCountApplication {
public static void main(final String[] args) throws Exception {
// 配置相关的kafka stream配置
Properties props = new Properties();
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "192.168.137.201:9092");
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
// 序列化/反序列化 解析器
final Serde<String> stringSerde = Serdes.String();
final Serde<Long> longSerde = Serdes.Long();
// 使用DSL
StreamsBuilder builder = new StreamsBuilder();
// 从kafka源topic获取数据流
KStream<String, String> textLines = builder.stream("streams-plaintext-input", Consumed.with(stringSerde, stringSerde));
KTable<String, Long> wordCounts = textLines
// 如果不是单词字母,进行分隔
.flatMapValues(textLine -> Arrays.asList(textLine.toLowerCase().split("\\W+")))
// 对分隔出来的字符做聚合key值
.groupBy((key, word) -> word)
// 计算结果
.count();
// 输出到目标topic
wordCounts.toStream().to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
KafkaStreams streams = new KafkaStreams(builder.build(), props);
streams.start();
}
}
创建输入topic
bin/kafka-console-producer.sh --broker-list localhost:9092 --topic streams-plaintext-input
创建输出topic
bin/kafka-console-consumer.sh --bootstrap-server localhost:9092 \
--topic streams-wordcount-output \
--from-beginning \
--formatter kafka.tools.DefaultMessageFormatter \
--property print.key=true \
--property print.value=true \
--property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer \
--property value.deserializer=org.apache.kafka.common.serialization.LongDeserializer
启动项目
我是在idea里编辑的,修改ip,直接启动。不报错即可。
数据流转
打数据
>bin/kafka-console-producer.sh --broker-list localhost:9092 --topic streams-plaintext-input
all streams lead to kafka
输出
> bin/kafka-console-consumer.sh --bootstrap-server localhost:9092
–topic streams-wordcount-output
–from-beginning
–formatter kafka.tools.DefaultMessageFormatter
–property print.key=true
–property print.value=true
–property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
–property value.deserializer=org.apache.kafka.common.serialization.LongDeserializer
all 1
streams 1
lead 1
to 1
kafka 1