在idea上创建一个maven工程
然后在poml.xml文件里增加kafka-streams的依赖包:
<dependency> <groupId>org.apache.kafka</groupId> <artifactId>kafka-streams</artifactId> <version>2.2.2</version> </dependency>
sum
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import java.util.Properties;
public class SumStreamDemo {
public static void main(String[] args) {
Properties prop = new Properties();
prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"wordcount");
prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.26.128:9092");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
//earliest latest none
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
//如何构建流结构拓扑
StreamsBuilder builder = new StreamsBuilder();
//不断从mystreamin上获取新数据,并且追加到流上的一个抽象对象
KStream<Object, Object> source = builder.stream("mystreamin");//[4,5,6]
//java lamda 表达式
//KTable是数据集合的抽象对象
KTable<String, String> sum1 = source.map((key, value) -> new KeyValue<String, String>("sum", value.toString()))
.groupByKey() //[sum,(4,5,6)]
.reduce((x, y) -> {
Integer sum = Integer.valueOf(x) + Integer.valueOf(y);
System.out.println(x + " + " + y + " = " + sum);
return sum.toString();
});
sum1.toStream().to("mystreamout");
Topology topology = builder.build();
KafkaStreams kafkaStreams = new KafkaStreams(topology, prop);
kafkaStreams.start();
}
}
wordcount
import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;
import org.apache.kafka.streams.kstream.KTable;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;
public class WordCountDemo {
public static void main(String[] args) {
Properties prop = new Properties();
prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"wordcount2");
prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.26.128:9092");
prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");
//earliest latest none
prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"latest");
prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
//如何构建流结构拓扑
StreamsBuilder builder = new StreamsBuilder();
//不断从mystreamin上获取新数据,并且追加到流上的一个抽象对象
KStream<Object , Object> source = builder.stream("mystreamin");//[4,5,6]
//java lamda 表达式
//KTable是数据集合的抽象对象
KTable<String, Long> wordcount = source.flatMapValues(value -> {
String[] split = value.toString().split("\\s+");
List<String> list = Arrays.asList(split);
return list;
})
.map((k, v) -> {
KeyValue<String, String> keyValue = new KeyValue<>(v, "1");
return keyValue;
})//12.10-15开始重新听
.groupByKey()
.count();
wordcount.toStream().foreach((key,value)->{
System.out.println(key+" "+value);
});
Topology topology = builder.build();
KafkaStreams kafkaStreams = new KafkaStreams(topology, prop);
kafkaStreams.start();
}
}
我们先在kafka环境里创建我们的输入topic跟输出topic
在master端口分别输入:
kafka-console-producer.sh --broker-list gdp:9092 --topic mystreamin
kafka-console-consumer.sh --bootstrap-server gdp:9092 --topic mystreamout --from-beginning
然后在producer下输入一些字母,可以在consumer端看到stream的处理结果。
做完,可以citl + C 关闭生产者跟消费者。