Join
/**
*
* 将两个数据流,进行join
*
* 如果让两个流能够join上,必须满足以下两个条件
* 1.由于数据是分散在多台机器上,必须将join条件相同的数据通过网络传输到同一台机器的同一个分区中(按照条件进行KeyBy)
* 2.让每个流中的数据都放慢,等等对方(划分相同类型,长度一样的窗口)
*
*/
public class EventTumblingWindowJoin {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//1000,o001,c001
DataStreamSource<String> lines1 = env.socketTextStream("linux01", 7777);
//1200,c001,图书
DataStreamSource<String> lines2 = env.socketTextStream("linux01", 8888);
//按照EventTime进行join,窗口长度为5000秒,使用新的提取EventTime生成WaterMark的API
//提取两个流的Watermark
SingleOutputStreamOperator<String> lines1WithWatermark
= lines1.assignTimestampsAndWatermarks(WatermarkStrategy.<String>forBoundedOutOfOrderness(Duration.ofSeconds(0)).withTimestampAssigner(new SerializableTimestampAssigner<String>() {
@Override
public long extractTimestamp(String element, long recordTimestamp) {
return Long.parseLong(element.split(",")[0]);
}
}));
SingleOutputStreamOperator<String> lines2WithWatermark
= lines2.assignTimestampsAndWatermarks(WatermarkStrategy.<String>forBoundedOutOfOrderness(Duration.ofSeconds(0)).withTimestampAssigner(new SerializableTimestampAssigner<String>() {
@Override
public long extractTimestamp(String element, long recordTimestamp) {
return Long.parseLong(element.split(",")[0]);
}
}));
//对两个流进行处理
SingleOutputStreamOperator<Tuple3<Long, String, String>> tpStream1
= lines1WithWatermark.map(new MapFunction<String, Tuple3<Long, String, String>>() {
@Override
public Tuple3<Long, String, String> map(String input) throws Exception {
String[] fields = input.split(",");
return Tuple3.of(Long.parseLong(fields[0]), fields[1], fields[2]);
}
});
SingleOutputStreamOperator<Tuple3<Long, String, String>> tpStream2
= lines2WithWatermark.map(new MapFunction<String, Tuple3<Long, String, String>>() {
@Override
public Tuple3<Long, String, String> map(String input) throws Exception {