kafka Streams实例-CSDN博客

本文主要描述kafka Streams的三个流实例
一. Pipe 二. line Split 三. word count

启动kafka服务
bin/zookeeper-server-start.sh config/zookeeper.properties
bin/kafka-server-start.sh config/server.properties
创建输入输出流

bin/kafka-topics.sh --create
--bootstrap-server localhost:9092
--replication-factor 1
--partitions 1
--topic streams-plaintext-input

bin/kafka-topics.sh --create
--bootstrap-server localhost:9092
--replication-factor 1
--partitions 1
--topic streams-wordcount-output
--config cleanup.policy=compact

利用JAVA IDE工具Idea 或者Eclipse创建一个maven项目。引入jar包

<dependency>
	<groupId>org.apache.kafka</groupId>
	<artifactId>kafka-clients</artifactId>
	<version>2.2.0</version>
</dependency>

<dependency>
	<groupId>org.apache.kafka</groupId>
	<artifactId>kafka-streams</artifactId>
	<version>2.2.0</version>
</dependency>
复制代码

所用的客户端版本与kafka服务器版本保持一致，本文采用的服务器版本是kafka_2.12-2.2.0

在新建的maven项目中新增Pipe处理类

import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.KafkaStreams;
import org.apache.kafka.streams.StreamsBuilder;
import org.apache.kafka.streams.StreamsConfig;
import org.apache.kafka.streams.Topology;

import java.util.Properties;
import java.util.concurrent.CountDownLatch;

public class Pipe {

    public static void main(String[] args) throws Exception {
        Properties props = new Properties();
        props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-pipe");
        props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
        props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

        final StreamsBuilder builder = new StreamsBuilder();

        builder.stream("streams-plaintext-input").to("streams-pipe-output");
        final Topology topology = builder.build();

        final KafkaStreams streams = new KafkaStreams(topology, props);
        final CountDownLatch latch = new CountDownLatch(1);

        // attach shutdown handler to catch control-c
        Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
            @Override
            public void run() {
                streams.close();
                latch.countDown();
            }
        });

        try {
            streams.start();
            latch.await();
        } catch (Throwable e) {
            System.exit(1);
        }
        System.exit(0);
    }
} 
复制代码

启动该main方法，然后在输入流输入，输出流就有对应的输出。

在新增的maven的项目中新增LineSplit处理类

 import org.apache.kafka.common.serialization.Serdes;
 import org.apache.kafka.streams.KafkaStreams;
 import org.apache.kafka.streams.StreamsBuilder;
 import org.apache.kafka.streams.StreamsConfig;
 import org.apache.kafka.streams.Topology;
 import org.apache.kafka.streams.kstream.KStream;
 
 import java.util.Arrays;
 import java.util.Properties;
 import java.util.concurrent.CountDownLatch;
 
 public class LineSplit {

 public static void main(String[] args) throws Exception {
     Properties props = new Properties();
     props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-linesplit");
     props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
     props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
     props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

     final StreamsBuilder builder = new StreamsBuilder();

     KStream<String, String> source = builder.stream("streams-plaintext-input");
     source.flatMapValues(value -> Arrays.asList(value.split("\\W+")))
             .to("streams-linesplit-output");

     final Topology topology = builder.build();
     final KafkaStreams streams = new KafkaStreams(topology, props);
     final CountDownLatch latch = new CountDownLatch(1);

     // attach shutdown handler to catch control-c
     Runtime.getRuntime().addShutdownHook(new Thread("streams-shutdown-hook") {
         @Override
         public void run() {
             streams.close();
             latch.countDown();
         }
     });

     try {
         streams.start();
         latch.await();
     } catch (Throwable e) {
         System.exit(1);
     }
     System.exit(0);
 }
复制代码

}

启动该main方法，输出流结果为

6.在新增的maven项目中新增word count处理类

    import org.apache.kafka.clients.consumer.ConsumerConfig;
    import org.apache.kafka.common.serialization.Serdes;
    import org.apache.kafka.streams.KafkaStreams;
    import org.apache.kafka.streams.StreamsBuilder;
    import org.apache.kafka.streams.StreamsConfig;
    import org.apache.kafka.streams.kstream.KStream;
    import org.apache.kafka.streams.kstream.KTable;
    import org.apache.kafka.streams.kstream.Produced;
    
    import java.util.Arrays;
    import java.util.Locale;
    import java.util.Properties;
    import java.util.concurrent.CountDownLatch;
    
    public final class WordCount {
    
        public static void main(final String[] args) {
            final Properties props = new Properties();
            props.put(StreamsConfig.APPLICATION_ID_CONFIG, "streams-wordcount");
            props.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "localhost:9092");
            props.put(StreamsConfig.CACHE_MAX_BYTES_BUFFERING_CONFIG, 0);
            props.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
            props.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass().getName());
    
            // setting offset reset to earliest so that we can re-run the demo code with the same pre-loaded data
            // Note: To re-run the demo, you need to use the offset reset tool:
            // https://cwiki.apache.org/confluence/display/KAFKA/Kafka+Streams+Application+Reset+Tool
            props.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
    
            final StreamsBuilder builder = new StreamsBuilder();
    
            final KStream<String, String> source = builder.stream("streams-plaintext-input");
    
            final KTable<String, Long> counts = source
                    .flatMapValues(value -> Arrays.asList(value.toLowerCase(Locale.getDefault()).split(" ")))
                    .groupBy((key, value) -> value)
                    .count();
    
            // need to override value serde to Long type
            counts.toStream().to("streams-wordcount-output", Produced.with(Serdes.String(), Serdes.Long()));
    
            final KafkaStreams streams = new KafkaStreams(builder.build(), props);
            final CountDownLatch latch = new CountDownLatch(1);
    
            // attach shutdown handler to catch control-c
            Runtime.getRuntime().addShutdownHook(new Thread("streams-wordcount-shutdown-hook") {
                @Override
                public void run() {
                    streams.close();
                    latch.countDown();
                }
            });
    
            try {
                streams.start();
                latch.await();
            } catch (final Throwable e) {
                System.exit(1);
            }
            System.exit(0);
        }
    }
复制代码

启动该main方法，查看输出结果命令： bin/kafka-console-consumer.sh --bootstrap-server localhost:9092
--topic streams-wordcount-output
--from-beginning
--formatter kafka.tools.DefaultMessageFormatter
--property print.key=true
--property print.value=true
--property key.deserializer=org.apache.kafka.common.serialization.StringDeserializer
--property value.deserializer=org.apache.kafka.common.serialization.LongDeserializer