目录
程序流程:
将txt数据源发送到topic1:TextLinesTopic
StreamBuilder 监听 topic1,并定制规则,输出到topic2:WordsWithCountsTopic
consumer从topic2中获取数据,并输出
整个过程中最核心内容是规则制作业务需求的规则
目录结构:
pom.xml
提供的kafka版本为2.1.0
<properties>
<project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
<junit>4.12</junit>
<kafka>2.1.0</kafka>
</properties><!-- kafka Producer、Consumer API -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-clients</artifactId>
<version>${kafka}</version>
</dependency><!-- kafka Stream API -->
<dependency>
<groupId>org.apache.kafka</groupId>
<artifactId>kafka-streams</artifactId>
<version>${kafka}</version>
</dependency>
流规则定义
package com.i2yun.kafka;
import java.util.Arrays;
import java.util.Properties;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import org.apache.kafka.streams.kstream.Materialized;
import org.apache.kafka.streams.kstream.Produced;
/**
* to run this demo, need to :<br>
*
* 1. start zookeeper and kafka
*
* <pre>
* nohup bin/zookeeper-server-start.sh config/zookeeper.properties >>zookeeper.out 2>&1 &
* nohup bin/kafka-server-start.sh config/server.properties >/dev/null 2>&1 &
* </pre>
*
* 2. create topic TextLinesTopic and WordsWithCountsTopic
*
* <pre>
* bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic TextLinesTopic
* bin/kafka-topics.sh --create --zookeeper localhost:2181 --replication-factor 1 --partitions 1 --topic WordsWithCountsTopic
* </pre>
*
* 3.make data source file: doc/testFolder/wordCont.txt
*
* 4. run producer and consumer
*
* <pre>
* @see com.i2yun.kafka.mq.SimpleConsumer
* @see com.i2yun.kafka.mq.SimpleProducer
* </pre>
*
*
* @author i2kwing
*
*/
public class WordCountApplication {
public static void main(final String[] args) throws Exception {
Properties props = new Properties();
// Kafka Streams requires at least the following properties "application.id","bootstrap.servers"
// each stream has unique id @see http://kafka.apache.org/21/documentation/streams/tutorial
props.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-application");
// config kafka location
// {@see CommonClientConfigs#BOOTSTRAP_SERVERS_DOC}, config "ubuntu-02 192.168.78.132" in the hosts file in advance
props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "ubuntu-02:9092");
// provide defualt serdes(serializer and deserializer)
props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
// config the processor with DSL(Domain Specified Language)
StreamsBuilder builder = new StreamsBuilder();
// create a source stream from a Kafka topic named TextLinesTopic
KStream<String, String> textLines = builder.stream("TextLinesTopic");
KTable<String, Long> wordCounts = textLines
// Create new KStream: by implements {@link ValueMapper} transform map textLine to String array
.flatMapValues(textLine -> Arrays.asList(textLine.toLowerCase().split("\\W+")))
// select value as key
.selectKey((key, word) -> word)
.groupByKey()
// count the number of records in this new stream. use default store
.count(Materialized.as("counts-store"));
// write this new kstream into another Kafka topic named WordsWithCountsTopic
wordCounts.toStream().to("WordsWithCountsTopic", Produced.with(Serdes.String(), Serdes.Long()));
// inspect what kinds of topology is created
Topology topology = builder.build();
System.out.println(topology.describe());
// create kafka stream
KafkaStreams streams = new KafkaStreams(topology, props);
streams.start();
}
}
生产数据
SimpleProducer:数据来源(生产数据:将文本文件中的每一行发送到指定topic)
package com.i2yun.kafka.mq;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.util.Properties;
import org.apache.kafka.clients.producer.KafkaProducer;
import org.apache.kafka.clients.producer.Producer;
import org.apache.kafka.clients.producer.ProducerRecord;
public class SimpleProducer {
public static void main(String[] args) throws Exception {
// Assign topicName to string variable
String topicName = "TextLinesTopic";
// create instance for properties to access producer configs
Properties props = new Properties();
// Assign localhost id, 参考http://kafka.apache.org/documentation/#producerapi
props.put("bootstrap.servers", "192.168.78.132:9092");
// Set acknowledgements for producer requests.
props.put("acks", "all");
// If the request fails, the producer can automatically retry,
props.put("retries", 0);
// Specify buffer size in config
props.put("batch.size", 16384);
// Reduce the no of requests less than 0
props.put("linger.ms", 1);
// The buffer.memory controls the total amount of memory available to the
// producer for buffering.
props.put("buffer.memory", 33554432);
props.put("key.serializer", "org.apache.kafka.common.serialization.StringSerializer");
props.put("value.serializer", "org.apache.kafka.common.serialization.StringSerializer");
Producer<String, String> producer = new KafkaProducer<String, String>(props);
// read a txt file , send it line by line
File file = new File("doc\\testFolder\\wordCount.txt");
BufferedReader reader = new BufferedReader(new FileReader(file));
String tempString = null;
while ((tempString = reader.readLine()) != null) {
producer.send(new ProducerRecord<String, String>(topicName, tempString));
Thread.sleep(1000);
}
reader.close();
/*
* for (int i = 0; i < 10; i++) { producer.send(new ProducerRecord<String, String>(topicName, Integer.toString(i), Integer.toString(i))); }
*/
System.out.println("Message sent successfully");
producer.close();
}
}
消费数据
SimpleConsumer: 消费数据(从新的topic中消费数据:打印key-value)
package com.i2yun.kafka.mq;
import java.time.Duration;
import java.util.Arrays;
import java.util.Properties;
import org.apache.kafka.clients.consumer.ConsumerRecord;
import org.apache.kafka.clients.consumer.ConsumerRecords;
import org.apache.kafka.clients.consumer.KafkaConsumer;
public class SimpleConsumer {
public static void main(String[] args) throws Exception {
// Kafka consumer configuration settings
String topicName = "WordsWithCountsTopic";
Properties props = new Properties();
props.put("bootstrap.servers", "192.168.78.132:9092");
props.put("group.id", "test");
props.put("enable.auto.commit", "true");
props.put("auto.commit.interval.ms", "1000");
props.put("session.timeout.ms", "30000");
props.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
props.put("value.deserializer", "org.apache.kafka.common.serialization.LongDeserializer");
KafkaConsumer<String, String> kafkaConsumer = new KafkaConsumer<String, String>(props);
// Kafka Consumer subscribes list of topics here.
kafkaConsumer.subscribe(Arrays.asList(topicName));
while (true) {
ConsumerRecords<String, String> records = kafkaConsumer.poll(Duration.ofMillis(100));
for (ConsumerRecord<String, String> record : records) {
// print the offset,key and value for the consumer records.
System.out.printf("offset = %d, key = %s, value = %s\n", record.offset(), record.key(), record.value());
}
}
}
}
数据源
wordCount.txt
Properties KStream KTable lambda
KStream KTable lambda lambda
KStream
Properties KStream KTable lambda
KStream KTable lambda lambda
KStream
Properties KStream KTable lambda
KStream KTable lambda lambda
KStream
Properties KStream KTable lambda
KStream KTable lambda lambda
KStream
Properties KStream KTable lambda
KStream KTable lambda lambda
KStream
Properties KStream KTable lambda
KStream KTable lambda lambda
KStream
输出内容:由于多次修改wordCount.txt,并运行producer, 而consumer消费的topic内容持续保存,因此以下内容和第一次运行时不一致 ,这是正确的
offset = 329, key = properties, value = 14
offset = 330, key = ktable, value = 28
offset = 331, key = lambda, value = 42
offset = 332, key = kstream, value = 42