kafkastream(二)

直接将kafka topic1中的数据传递给topic2

样例1

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;

import java.util.ArrayList;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;

public class EventAttendStream {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"eventattend");//一个应用一个组
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
        prop.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,300);//间隔
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//是否自动提交
        //earliest latest none
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        StreamsBuilder builder = new StreamsBuilder();
        KStream<Object, Object> source = builder.stream("event_attendees_row");
        source.flatMap((key,value)->{
            //value -> event,yes,maybe,invited,no
            String[] fields = value.toString().split(",");
            ArrayList<KeyValue<String,String>> list = new ArrayList<>();
            if (fields.length>=2 && fields[1].trim().length()>0) {
                String[] yes = fields[1].split(" ");
                for (String s : yes) {
                    System.out.println(fields[0] + "," + s + ",yes");
                    KeyValue<String, String> yesKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",yes");
                    list.add(yesKeyValue);
                }
            }
            if (fields.length>=3 && fields[2].trim().length()>0){
                String[] maybe = fields[2].split(" ");
                for (String s : maybe) {
                    System.out.println(fields[0] + "," + s + ",maybe");
                    KeyValue<String, String> maybeKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",maybe");
                    list.add(maybeKeyValue);
                }
            }
            if (fields.length>=4 && fields[3].trim().length()>0){
                String[] invited = fields[3].split(" ");
                for (String s : invited) {
                    System.out.println(fields[0] + "," + s + ",invited");
                    KeyValue<String, String> invitedKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",invited");
                    list.add(invitedKeyValue);
                }
            }
            if (fields.length>=5 && fields[4].trim().length()>0){
                String[] no = fields[4].split(" ");
                for (String s : no) {
                    System.out.println(fields[0] + "," + s + ",no");
                    KeyValue<String, String> noKeyValue = new KeyValue<>(null, fields[0] + "," + s + ",no");
                    list.add(noKeyValue);
                }
            }
            return list;
        }).to("event_attendees_1");
        Topology topo = builder.build();
        KafkaStreams streams = new KafkaStreams(topo,prop);
        CountDownLatch latch = new CountDownLatch(1);
        Runtime.getRuntime().addShutdownHook(new Thread("stream"){
            public void run(){
                streams.close();
                latch.countDown();
            }
        });
        streams.start();
        try {
            latch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

样例2

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.KStream;

import java.util.ArrayList;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;


public class UserFriendStream {
    public static void main(String[] args) {
        Properties prop = new Properties();
        prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"userfriend1");//一个应用一个组
        prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
        prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
        prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
        prop.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,300);//间隔
        prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//是否自动提交
        //earliest latest none
        prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");

        StreamsBuilder builder = new StreamsBuilder();
        //user,friends=> 3238005,47949549 68056805
        KStream<Object, Object> source = builder.stream("user_friends");
        KStream<String, String> user_friends = source.flatMap((key, value) -> {
            ArrayList<KeyValue<String, String>> list = new ArrayList<>();
            String[] fields = value.toString().split(",");
            if (fields.length == 2) {
                String[] friends = fields[1].split("\\s+");
                String user = fields[0];
                if (user.trim().length() > 0) {
                    for (String friend : friends) {
                        System.out.println(user + "," + friend);
                        KeyValue<String, String> keyValue = new KeyValue<>(null, user + "," + friend);
                        list.add(keyValue);
                    }

                }
            }
            return list;
        });
        user_friends.to("userfriends");
        Topology topo = builder.build();
        KafkaStreams streams = new KafkaStreams(topo,prop);
        CountDownLatch latch = new CountDownLatch(1);
        Runtime.getRuntime().addShutdownHook(new Thread("stream"){
            public void run(){
                streams.close();
                latch.countDown();
            }
        });
        streams.start();
        try {
            latch.await();
        } catch (InterruptedException e) {
            e.printStackTrace();
        }

    }
}

kafka stream 窗口

流式数据是在时间上无界的数据。而聚合操作只能作用在特定的数据集,也即有界的数据集上。因此需要通过某种方式从无界的数据集上按特定的语义选取出有界的数据。窗口是一种非常常用的设定计算边界的方式。不同的流式处理系统支持的窗口类似,但不尽相同。

Hopping Time Window

跳跃时间窗口,它有两个属性,一个是Window size,一个是Advance interval。Window size指定了窗口的大小,也即每次计算的数据集的大小。而Advance interval定义输出的时间间隔。应用开始运行的时间就是第一个窗口的起始时间,然后每经过一个advance interval便会创建一个新的窗口,同时每个窗口的宽度都是size(时间上的宽度)

Tumbling time window

滚动时间窗口,是跳跃时间窗口的一种特例,当跳跃时间窗口的size和advance iterval值相等时,它就变成了滚动时间窗口。
滚动时间窗口只有一个参数:size,表示窗口的尺寸,一个窗口的结束点会是下一个窗口的起始点。窗口之间没有间隙,也不重叠。

Session window

该窗口用于对Key做Group后的聚合操作中。它需要对Key做分组,然后对组内的数据根据业务需求定义一个窗口的起始点和结束点。一个典型的案例是,希望通过Session Window计算某个用户访问网站的时间。对于一个特定的用户(用Key表示)而言,当发生登录操作时,该用户(Key)的窗口即开始,当发生退出操作或者超时时,该用户(Key)的窗口即结束。窗口结束时,可计算该用户的访问时间或者点击次数等。

Sliding Window

该窗口只用于2个KStream进行Join计算时。该窗口的大小定义了Join两侧KStream的数据记录被认为在同一个窗口的最大时间差。假设该窗口的大小为5秒,则参与Join的2个KStream中,记录时间差小于5的记录被认为在同一个窗口中,可以进行Join计算。

package kafka;

import org.apache.kafka.clients.consumer.ConsumerConfig;
import org.apache.kafka.common.serialization.Serdes;
import org.apache.kafka.streams.*;
import org.apache.kafka.streams.kstream.*;

import java.time.Duration;
import java.util.ArrayList;
import java.util.Properties;
import java.util.concurrent.CountDownLatch;

public class SessionDemo {
    public static void main(String[] args) {
                Properties prop = new Properties();
                prop.put(StreamsConfig.APPLICATION_ID_CONFIG,"windowdemo3");//一个应用一个组
                prop.put(StreamsConfig.BOOTSTRAP_SERVERS_CONFIG,"192.168.232.211:9092");
                prop.put(StreamsConfig.DEFAULT_KEY_SERDE_CLASS_CONFIG, Serdes.String().getClass());
                prop.put(StreamsConfig.DEFAULT_VALUE_SERDE_CLASS_CONFIG,Serdes.String().getClass());
                prop.put(StreamsConfig.COMMIT_INTERVAL_MS_CONFIG,300);//间隔
                prop.put(ConsumerConfig.ENABLE_AUTO_COMMIT_CONFIG,"false");//是否自动提交
                //earliest latest none
                prop.put(ConsumerConfig.AUTO_OFFSET_RESET_CONFIG,"earliest");
                StreamsBuilder builder = new StreamsBuilder();
                SessionWindowedKStream<String, String> windowdemo1 = builder.stream("windowdemo")
//                .flatMapValues((value) -> {
//                    String[] split = value.toString().split("\\s+");
//                    return Arrays.asList(split);
//                })
                        .flatMap((key, value) -> {
                            ArrayList<KeyValue<String, String>> keyValues = new ArrayList<>();
                            return keyValues;
                        })
                        .map((key, value) -> {
                            return new KeyValue<String, String>(value, "1");
                        }).groupByKey()
//                .windowedBy(TimeWindows.of(Duration.ofSeconds(15).toMillis()));//跳跃
//        .windowedBy(TimeWindows.of(Duration.ofSeconds(15).toMillis())
//                .advanceBy(Duration.ofSeconds(5).toMillis()));//滑动
                        .windowedBy(SessionWindows.with(Duration.ofSeconds(15).toMillis()));
                //SessionWindow
        KStream<Windowed<String>, Long> windowedLongKStream = windowdemo1.count().toStream();
        windowedLongKStream.foreach((key,value)->{
                    System.out.println("key:" + key + "value:" + value);
                });
                Topology topo = builder.build();
                KafkaStreams streams = new KafkaStreams(topo,prop);
                CountDownLatch latch = new CountDownLatch(1);
                Runtime.getRuntime().addShutdownHook(new Thread("stream"){
                    public void run(){
                        streams.close();
                        latch.countDown();
                    }
                });
                streams.start();
                try {
                    latch.await();
                } catch (InterruptedException e) {
                    e.printStackTrace();
                }
            }
        }




  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
在PyCharm中使用Kafka,你可以按照以下步骤进行操作: 1. 首先,确保你已经安装了kafka-python模块。你可以通过在线安装或离线安装来安装该模块。在线安装可以使用以下命令: ``` pip install kafka-python ``` 离线安装可以在PyPI网站上搜索kafka-python并下载安装包进行安装\[2\]。 2. 在你的Python代码中导入KafkaProducer类和KafkaConsumer类,并创建相应的实例。例如,对于生产者,你可以使用以下代码: ``` from kafka import KafkaProducer producer = KafkaProducer(bootstrap_servers='192.168.1.10:9092') ``` 其中,`bootstrap_servers`参数指定了Kafka集群的地址和端口。 3. 使用`producer.send()`方法将消息发送到Kafka集群。例如,你可以使用以下代码发送消息: ``` msg = input("input the msg:") future = producer.send("sparkapp", msg.encode()) ``` 其中,第一个参数是要发送到的主题(topic),第个参数是要发送的消息。 4. 对于消费者,你可以使用以下代码创建一个消费者实例并订阅主题: ``` from kafka import KafkaConsumer consumer = KafkaConsumer('test', bootstrap_servers=\['192.168.24.129'\]) ``` 其中,`'test'`是要订阅的主题,`bootstrap_servers`参数指定了Kafka集群的地址和端口。 5. 使用`for`循环遍历消费者接收到的消息并进行处理。例如,你可以使用以下代码打印接收到的消息: ``` for msg in consumer: recv = "%s:%d:%d: key=%s value=%s" % (msg.topic, msg.partition, msg.offset, msg.key, msg.value) print(recv) ``` 这样,你就可以在PyCharm中使用Kafka进行数据的写入和消费了\[1\]\[3\]。 #### 引用[.reference_title] - *1* [python实现kafka生产以及sparkstream消费](https://blog.csdn.net/weixin_46408961/article/details/123891315)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^insertT0,239^v3^insert_chatgpt"}} ] [.reference_item] - *2* [python操作kafaka](https://blog.csdn.net/feiyu361/article/details/120155387)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^insertT0,239^v3^insert_chatgpt"}} ] [.reference_item] - *3* [超简单kafak安装及python操作教程](https://blog.csdn.net/XiaoXiaoYunXing/article/details/105617380)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v91^insertT0,239^v3^insert_chatgpt"}} ] [.reference_item] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值