Kafka

搭建

准备工作

  • 分布式集群中时钟同步

  • JDK1.8以上

  • ZooKeeper集群服务运行正常

安装

[root@Kafka0* ~]# tar -zxf kafka_2.11-2.2.0.tgz -C /usr

环境

修改kafka核心配置文件server.properties

[root@Kafka01 kafka_2.11-2.2.0]# vi config/server.properties
broker.id=0 | 1 | 2
listeners=PLAINTEXT://Kafka01|2|3:9092
log.dirs=/usr/kafka_2.11-2.2.0/data
zookeeper.connect=Kafka01:2181,Kafkak02:2181,,Kafka03:2181

启动服务

[root@Kafka0* kafka_2.11-2.2.0]# bin/kafka-server-start.sh config/server.properties
[root@Kafka0* kafka_2.11-2.2.0]# bin/kafka-server-stop.sh config/server.properties

基础使用

shell

  • 新建Topic
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092 --topic t1 --partitions 3 --replication-factor 3 --create
  • 展示Topic列表
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092  --list
  • 删除Topic
[root@Kafka02 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092  --delete --topic t2
  • 描述Topic
[root@Kafka02 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092  --describe --topic t1
  • 修改Topic
[root@Kafka02 kafka_2.11-2.2.0]# bin/kafka-topics.sh --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092  --alter --topic t1 --partitions 5
  • 发布
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-console-producer.sh --broker-list Kafka01:9092,Kafka02:9092,Kafka03:9092 --topic t1
  • 订阅
[root@Kafka01 kafka_2.11-2.2.0]# bin/kafka-console-consumer.sh --topic t1 --bootstrap-server Kafka01:9092,Kafka02:9092,Kafka03:9092

Java

依赖

<dependency>
    <groupId>org.apache.kafka</groupId>
    <artifactId>kafka-clients</artifactId>
    <version>2.2.0</version>
</dependency>

生产者

public class ProducerDemo {

    public static void main(String[] args) {
        //1. 准备Kafka生产者配置信息
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"Kafka01:9092,Kafka02:9092,Kafka03:9092");
        // string 序列化(Object ---> byte[])器
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);

        //2. 创建kafka生产者对象
        KafkaProducer<String, String> producer = new KafkaProducer<String, String>(properties);

        //3. 生产记录并将其发布
        ProducerRecord<String, String> record = new ProducerRecord<String, String>("t2", UUID.randomUUID().toString(),"Hello Kafka");

        producer.send(record);

        //4. 释放资源
        producer.flush();
        producer.close();
    }
}

消费者

public class ConsumerDemo {
    public static void main(String[] args) {
        //1. 指定kafka消费者的配置信息
        Properties properties = new Properties();
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
        // 反序列化器 byte[] ---> Object
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        // 消费组必须得指定
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, "group1");

        //2. 创建kafka消费者对象
        KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(properties);

        //3. 订阅主体topic
        consumer.subscribe(Arrays.asList("t2"));

        //4. 拉取新产生的记录
        while (true) {
            ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(10));
            for (ConsumerRecord<String, String> record : records) {
                System.out.println(record.key() + "\t" + record.value() + "\t"
                        + record.topic() + "\t" + record.offset()
                        + "\t" + record.timestamp() + "\t" + record.partition());
            }
        }
    }
}

高级部分

偏移量控制

  • 偏移量消费策略:
properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
  • 自动提交策略:
// 默认自动提交消费的位置offset
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,true);
// 默认每隔5秒提交一次消费位置
properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,5000);
  • 通常情况需要手动提交消费位置:
// 关闭消费位置offset的自动提交功能
properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,false);

// 手动提交消费位置
consumer.commitSync();

消费方式

订阅

// 订阅(消费)Topic所有的分区
consumer.subscribe(Arrays.asList("t3"));

指定消费分区

// 指定消费Topic的特定分区
consumer.assign(Arrays.asList(new TopicPartition("t3",0)));

重置消费位置

consumer.assign(Arrays.asList(new TopicPartition("t3",0)));
// 重置消费位置
consumer.seek(new TopicPartition("t3",0),1);

自定义对象

依赖

<dependency>
    <groupId>commons-lang</groupId>
    <artifactId>commons-lang</artifactId>
    <version>2.4</version>
</dependency>

编解码器类

/**
 * 自定义对象的编解码器类
 */
public class ObjectCodec implements Serializer, Deserializer {

    /**
     * bytes[] ---> Object
     * @param s
     * @param bytes
     * @return
     */
    @Override
    public Object deserialize(String s, byte[] bytes) {
        return SerializationUtils.deserialize(bytes);
    }

    @Override
    public void configure(Map map, boolean b) {

    }

    /**
     * Object ---> bytes[]
     * @param s
     * @param o
     * @return
     */
    @Override
    public byte[] serialize(String s, Object o) {
        return SerializationUtils.serialize((Serializable) o);
    }

    @Override
    public void close() {
    }
}

测试

  • 生产者
public class ProducerDemo {

    public static void main(String[] args) {
        //1. 准备Kafka生产者配置信息
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"Kafka01:9092,Kafka02:9092,Kafka03:9092");
        // string 序列化(Object ---> byte[])器
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,ObjectCodec.class);

        //2. 创建kafka生产者对象
        KafkaProducer<String, User> producer = new KafkaProducer<String, User>(properties);

        //3. 生产记录并将其发布

        for (int i = 1; i < 10; i++) {
            // key不为null  第一种策略
            ProducerRecord<String, User> record = new ProducerRecord<String, User>("t4", UUID.randomUUID().toString(),
                    new User(i,"zs:"+i,new Date()));
            // key为null 轮询策略
            producer.send(record);
        }

        //4. 释放资源
        producer.flush();
        producer.close();
    }
}
  • 消费者
public class ConsumerDemo {
    public static void main(String[] args) throws InterruptedException {
        //1. 指定kafka消费者的配置信息
        Properties properties = new Properties();
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
        // 反序列化器 byte[] ---> Object
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, ObjectCodec.class);
        // 注意:此配置项 修改偏移量消费策略的默认行为
        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");

        // 关闭消费位置offset的自动提交功能
        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        //properties.put(ConsumerConfig.AUTO_COMMIT_INTERVAL_MS_CONFIG,5000);

        // 消费组必须得指定
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, "group1");


        //2. 创建kafka消费者对象
        KafkaConsumer<String, User> consumer = new KafkaConsumer<String, User>(properties);

        //3. 订阅主体topic
        consumer.subscribe(Arrays.asList("t4"));

        //4. 拉取新产生的记录
        while (true) {
            ConsumerRecords<String, User> records = consumer.poll(Duration.ofSeconds(10));
            for (ConsumerRecord<String, User> record : records) {
                User user = record.value();
                System.out.println(user);
            }
            // 手动提交消费位置
            consumer.commitSync();
        }
    }
}

生产者批量发送

使用方法

properties.put(ProducerConfig.BATCH_SIZE_CONFIG,16384);
properties.put(ProducerConfig.LINGER_MS_CONFIG,2000);

Kafka和Spring Boot整合

依赖

dependencies>
     <dependency>
         <groupId>org.springframework.boot</groupId>
         <artifactId>spring-boot-starter-web</artifactId>
     </dependency>
     <dependency>
         <groupId>org.springframework.kafka</groupId>
         <artifactId>spring-kafka</artifactId>
     </dependency>

     <dependency>
         <groupId>org.springframework.boot</groupId>
         <artifactId>spring-boot-starter-test</artifactId>
         <scope>test</scope>
     </dependency>
     <dependency>
         <groupId>org.springframework.kafka</groupId>
         <artifactId>spring-kafka-test</artifactId>
         <scope>test</scope>
     </dependency>
</dependencies>

配置文件

spring.kafka.bootstrap-servers= Kafka01:9092,Kafka02:9092,Kafka03:9092
spring.kafka.consumer.group-id=g1
spring.kafka.producer.key-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.producer.value-serializer=org.apache.kafka.common.serialization.StringSerializer
spring.kafka.consumer.key-deserializer=org.apache.kafka.common.serialization.StringDeserializer
spring.kafka.consumer.value-deserializer=org.apache.kafka.common.serialization.StringDeserializer

生产者

@Component
public class KafkaProducerDemo {

    @Autowired
    private KafkaTemplate<String,String> template;

    // 计划任务,定时发送数据
    // cron 秒 分 时 日 月 周 年(省略)
    @Scheduled(cron = "0/10 * * * * ?")
    public void send(){
        template.send("t5", UUID.randomUUID().toString(),"Hello Kafka");
        //System.out.println(new Date());
    }
}

消费者

@Component
public class KafkaConsumerDemo {

    @KafkaListener(topics = "t5")
    public void receive(ConsumerRecord<String, String> record) {
        System.out.println(record.key() + "\t" + record.value());
    }
}

生产者幂等操作

properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG,true); // 开启幂等操作支持
properties.put(ProducerConfig.ACKS_CONFIG,"all");  // ack时机 -1或者all 所有  1 leader  0 立即应答
properties.put(ProducerConfig.RETRIES_CONFIG,5);   // 重复次数
properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 3000); // 请求超时时间

Kafka事务

生产者

public class ProducerDemo {

    public static void main(String[] args) {
        //1. 准备Kafka生产者配置信息
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG,"Kafka01:9092,Kafka02:9092,Kafka03:9092");
        // string 序列化(Object ---> byte[])器
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG,StringSerializer.class);

        // 事务ID, 唯一不可重复
        properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG,UUID.randomUUID().toString());
        // 开启幂等操作支持
        properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG,true);
        properties.put(ProducerConfig.ACKS_CONFIG,"all");  // ack时机 -1或者all 所有  1 leader  0 立即应答
        properties.put(ProducerConfig.RETRIES_CONFIG,5);   // 重复次数
        properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 3000); // 请求超时时间

        //2. 创建kafka生产者对象
        KafkaProducer<String, String> producer = new KafkaProducer<String, String>(properties);

        // 初始化事务
        producer.initTransactions();

        // 开启事务
        producer.beginTransaction();

        try {
            //3. 生产记录并将其发布
            for (int i = 50; i < 60; i++) {
                if(i == 56) {
                    int m = 1/0; //人为制造错误
                }
                // key不为null  第一种策略
                ProducerRecord<String, String> record = new ProducerRecord<String, String>("t3", UUID.randomUUID().toString(),"Hello Kafka"+i);
                // key为null 轮询策略
                producer.send(record);
            }
            // 提交事务
            producer.commitTransaction();
        } catch (Exception e) {
            e.printStackTrace();
            // 取消事务
            producer.abortTransaction();
        } finally {
            //4. 释放资源
            producer.flush();
            producer.close();
        }
    }
}

消费者

// 其余代码 一致
// 修改消费者默认的事务隔离级别
properties.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG,"read_committed");

消费生产并存事务

public class ConsumeTransformProduceDemo {

    public static void main(String[] args) {
        //1. 初始化生产者和消费者的配置对象
        KafkaConsumer<String, String> consumer = new KafkaConsumer<String, String>(consumerConfig());
        KafkaProducer<String, String> producer = new KafkaProducer<>(producerConfig());

        //2. 消费者订阅topic
        consumer.subscribe(Arrays.asList("t6"));

        //3. 事务操作
        producer.initTransactions();

        while (true) {
            producer.beginTransaction();
            try {
                ConsumerRecords<String, String> records = consumer.poll(Duration.ofSeconds(5));
                Map<TopicPartition, OffsetAndMetadata> offsets = new HashMap<>();
                for (ConsumerRecord<String, String> record : records) {
                    // 需要业务处理的内容
                    System.out.println(record.key() + "--->" + record.value());
                    producer.send(new ProducerRecord<String,String>("t7","t7:"+record.value()));
                    // 模拟错误
                    // int m = 1/0;
                    // 将消费位置记录到map集合中
                    offsets.put(new TopicPartition("t6",record.partition()),new OffsetAndMetadata(record.offset()+1));
                }
                // 维护消费位置  将事务内的消费位置信息 提交到kafka中
                producer.sendOffsetsToTransaction(offsets,"g1");

                // 正确操作 提交事务
                producer.commitTransaction();
            } catch (Exception e) {
                e.printStackTrace();
                producer.abortTransaction();
            }
        }
    }

    public static Properties producerConfig() {
        Properties properties = new Properties();
        properties.put(ProducerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
        properties.put(ProducerConfig.KEY_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        properties.put(ProducerConfig.VALUE_SERIALIZER_CLASS_CONFIG, StringSerializer.class);
        properties.put(ProducerConfig.TRANSACTIONAL_ID_CONFIG, UUID.randomUUID().toString());
        properties.put(ProducerConfig.ENABLE_IDEMPOTENCE_CONFIG, Boolean.TRUE);
        properties.put(ProducerConfig.RETRIES_CONFIG, 5);
        properties.put(ProducerConfig.ACKS_CONFIG, "all");
        properties.put(ProducerConfig.REQUEST_TIMEOUT_MS_CONFIG, 3000);
        properties.put(ProducerConfig.BATCH_SIZE_CONFIG, 16384);
        properties.put(ProducerConfig.LINGER_MS_CONFIG, 2000);
        return properties;
    }

    public static Properties consumerConfig() {
        Properties properties = new Properties();
        properties.put(ConsumerConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:9092");
        properties.put(ConsumerConfig.KEY_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.VALUE_DESERIALIZER_CLASS_CONFIG, StringDeserializer.class);
        properties.put(ConsumerConfig.GROUP_ID_CONFIG, "g1");
        properties.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG, "earliest");
        properties.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG, false);
        properties.put(ConsumerConfig.ISOLATION_LEVEL_CONFIG, "read_committed");
        return properties;
    }
}

Kafka Streaming

<dependency>
    <groupId>org.apache.kafka</groupId>
    <artifactId>kafka-streams</artifactId>
    <version>2.2.0</version>
</dependency>
public class WordCountApplication {

    public static void main(String[] args) {
        //1. 指定流处理应用的配置信息
        Properties properties = new Properties();
        properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "Kafka01:9092,Kafka02:9092,Kafka03:90922");
        properties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        properties.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount-highlevel-application");
        properties.put(StreamsConfig.NUM_STREAM_THREADS_CONFIG, 3);

        //2. 编织拓扑任务
        StreamsBuilder sb = new StreamsBuilder();
        KStream<String, String> stream = sb.stream("t10");
        KTable<String, Long> kTable = stream
                // null hello
                // null world
                .flatMap((key, value) -> {
                    String[] words = value.toLowerCase().split(" ");
                    ArrayList<KeyValue<String, String>> list = new ArrayList<>();
                    for (String word : words) {
                        KeyValue<String, String> keyValue = new KeyValue<>(key, word);
                        list.add(keyValue);
                    }
                    return list;

                })
                .map((k, v) -> new KeyValue<String, Long>(v, 1L))
                .groupByKey(Grouped.with(Serdes.String(), Serdes.Long()))
                .count();

        kTable.toStream().to("t11", Produced.with(Serdes.String(), Serdes.Long()));

        Topology topology = sb.build();

        // 打印自动生产的Topology信息
        System.out.println(topology.describe().toString());

        //3. 初始化流处理应用
        KafkaStreams kafkaStreams = new KafkaStreams(topology, properties);
        //4. 启动流处理应用
        kafkaStreams.start();
    }
}

操作算子

无状态的操作算子(stateless)

无状态的操作算子, 指进行数据转换操作时不会涉及到状态的管理

  • Branch

    KStream ----> KStream[]

    KStream<String, String>[] kStreams = stream.branch(
                    (k, v) -> v.startsWith("A"),   // stream: A开头
                    (k, v) -> true                 // 其它数据
            );
    kStreams[0].foreach((k,v) -> System.out.println(k + "\t"+v));
    
  • Filter

    KStream —> KStream

    保留符合Boolean条件(true)的数据

    stream
         .filter((k,v) -> v.startsWith("H"))
        .foreach((k,v) -> System.out.println(k+"\t"+v));
    
  • filterNot

    KStream → KStream

    KTable → KTable

    保留不符合Boolean条件的数据

    stream
        .filterNot((k,v) -> v.startsWith("H"))
        .foreach((k,v) -> System.out.println(k+"\t"+v));
    
  • FlatMap

    KStream → KStream

    将一个Record展开为0-n个Record

    stream
         .flatMap((k,v) -> Arrays.asList(
              new KeyValue<String,String>(k,v.toUpperCase()+"!"),
              new KeyValue<String,String>(k,v.toLowerCase()+"?")))
        .foreach((k,v) -> System.out.println(k +"\t" + v));
    
  • flatMapValues

    KStream → KStream

    将一个Record的value展开为1到N个新的value(key不变)

    stream
                    // null Hello World
                    //--------------------
                    // null Hello
                    // null World
        .flatMapValues((v) -> Arrays.asList(v.split(" ")))
        .foreach((k, v) -> System.out.println(k + "\t" + v));
    
  • Foreach

    KStream → void (终止操作)

    对KStream中的数据进行迭代遍历,无返回值

    stream
                    // null Hello World
                    //--------------------
                    // null Hello
                    // null World
        .flatMapValues((v) -> Arrays.asList(v.split(" ")))
        .foreach((k, v) -> System.out.println(k + "\t" + v));
    
  • GroupBy

    KStream → KGroupedStream

    根据指定的信息 进行分区操作,注意分组时会进行Shuffle(洗牌)

    //============================groupBy===================================
    stream
                    // null Hello World
                    //--------------------
                    // null Hello
                    // null World
         .flatMapValues((v) -> Arrays.asList(v.split(" ")))
        .groupBy((k,v) -> v)
        .count()
        .toStream()
        .foreach((k,v) -> System.out.println(k+"\t"+v));
    //======================================================================
    
  • GroupByKey

    KStream → KGroupedStream

    根据已存在的key值进行分区操作(洗牌)

    stream
                    // null Hello World
                    //--------------------
                    // null Hello
                    // null World
        .flatMapValues((v) -> Arrays.asList(v.split(" ")))
        .map((k,v) -> new KeyValue<String,Long>(v,1L))
        .groupByKey(Grouped.with(Serdes.String(),Serdes.Long()))
        .count()
        .toStream()
        .foreach((k,v) -> System.out.println(k+"\t"+v));
    
  • Map

    KStream → KStream

    将一个流中的一条数据映射为另外一条数据

  • mapValues

    类似于map操作,不同key不可变,V可变

    stream
        // null Hello World
        //--------------------
        // null Hello
        // null World
        .flatMapValues((v) -> Arrays.asList(v.split(" ")))
        .map((k,v) -> new KeyValue<>(v,1L))
        .mapValues(v -> v = v+1)
        .foreach((k,v) -> System.out.println(k+"\t"+v));
    
  • Merge

    KStream → KStream

    将两个流合并为一个大流

    KStream<String, String>[] streams = stream
                    .branch(
                            (k, v) -> v.startsWith("A"),
                            (k, v) -> v.startsWith("B"),
                            (k, v) -> true
                    );
            streams[0].merge(streams[2])
                    .foreach((k,v) -> System.out.println(k+"\t"+v));
    
  • Peek

    KStream → KStream

    探针(调试程序): 不会改变数据流内容

    stream.peek((k,v) -> System.out.println(k+"\t"+v));
    
  • Print

    等价于foreach((key, value) -> System.out.println(key + ", " + value))

    stream.print(Printed.toSysOut());
    
  • SelectKey

    KStream → KStream

    给流中的数据,分配新的k值(k变,v不变)

    stream.selectKey((k,v) -> "Hello:").print(Printed.toSysOut());
    
  • Table to Stream

    KTable → KStream

    table.toStream();
    
有状态的操作算子(stateful)
  • aggregate(聚合)

    KGroupedStream --> KTable

    滚动聚合: 根据分组的key,聚合values

    stream
                    // null Hello Hello
         .flatMapValues(v -> Arrays.asList(v.split(" ")))
        // null Hello
        // null Hello
        .groupBy((k,v) -> v,Grouped.with(Serdes.String(),Serdes.String()))
        // Hello [Hello,Hello].length
        // Hello 2+0
        // 参数一: 初始化器  参数二:聚合器(k: word, v: [])
        .aggregate(()->0L,(k,v,aggs) -> aggs + 1L,Materialized.with(Serdes.String(),Serdes.Long()))
        .toStream()
        .print(Printed.toSysOut());
    
  • count(计数)

    KGroupedStream → KTable

    滚动聚合: 根据分组的key,统计value的数量

    stream
                    // null Hello Hello
         .flatMapValues(v -> Arrays.asList(v.split(" ")))
        // null Hello
        // null Hello
        .groupBy((k,v) -> v,Grouped.with(Serdes.String(),Serdes.String()))
        .count()
        .toStream()
        .print(Printed.toSysOut());
    
  • Reduce

    KGroupedStream → KTable

    滚动聚合:根据分组的key,合并value值列表

     stream
                     // null Hello Hello
         .flatMapValues(v -> Arrays.asList(v.split(" ")))
         // null Hello
         // null Hello
         .map((k,v) -> new KeyValue<String,Long>(v,1L))
         .groupByKey(Grouped.with(Serdes.String(),Serdes.Long()))
         // Hello [1,1,1]
         // World [1,1,1,1]
         // 参数一: 初始化器  参数二:聚合器(k: word, v: [])
         .reduce((v1,v2) -> {
             System.out.println(v1 +"\t"+v2);
             Long result = v1+v2;
             return result;
         },Materialized.with(Serdes.String(),Serdes.Long()))
         .toStream()
         .print(Printed.toSysOut());
    

Window

Tumbling

public class KafkaStreamingWordCountWithWindow {
    public static void main(String[] args) {
        Properties properties = new Properties();
        properties.put(StreamsConfig.APPLICATION_ID_CONFIG, "wordcount22");
        properties.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG, "kafka01:9092");
        properties.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        properties.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG, Serdes.String().getClass());

        StreamsBuilder builder = new StreamsBuilder();
        // 流处理 数据的来源
        KStream<String, String> kStream = builder.stream("input");

        kStream
                .flatMap((k, v) -> {
                    ArrayList<KeyValue<String, String>> keyValues = new ArrayList<>();
                    String[] words = v.split(" ");
                    for (String word : words) {
                        keyValues.add(new KeyValue<String, String>(k, word));
                    }
                    return keyValues;
                })
                .map((k, v) -> new KeyValue<String, Long>(v, 1L))
                .groupBy((k, v) -> k, Grouped.with(Serdes.String(), Serdes.Long()))
          			// 滚动窗口的大小
                .windowedBy(TimeWindows.of(Duration.ofSeconds(10)))
                .reduce((value1, value2) -> value1 + value2, Materialized.<String, Long, WindowStore<Bytes, byte[]>>as("counts").withKeySerde(Serdes.String()).withValueSerde(Serdes.Long()))
                .toStream()
                .peek(((Windowed<String> key, Long value) -> {
                    Window window = key.window();
                    SimpleDateFormat sdf = new SimpleDateFormat("HH:mm:ss");
                    long start = window.start();
                    long end = window.end();
                    System.out.println(sdf.format(start) + " ~ " + sdf.format(end) + "\t" + key.key() + "\t" + value);
                }));

        // 构建kafka streaming 应用
        KafkaStreams kafkaStreams = new KafkaStreams(builder.build(), properties);
        kafkaStreams.start();
    }
}

Hopping

Duration windowSizeMs = Duration.ofMinutes(5);
Duration advanceMs =    Duration.ofMinutes(1);
TimeWindows.of(windowSizeMs).advanceBy(advanceMs);

Sliding

Session

SessionWindows.with(Duration.ofMinutes(5));
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值