环境准备
//配置WEBUI
Configuration configuration=new Configuration();
configuration.setInteger(RestOptions.PORT,8848);
//创建flink执行环境
StreamExecutionEnvironment environment = StreamExecutionEnvironment.createLocalEnvironmentWithWebUI(configuration);
//设置并行度
environment.setParallelism(4);
//设置时间语义
environment.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
// TODO: 2022/2/8 Interval Join 对两个KeyedStream进行Join,需要指定时间范围和Join时使用的Key,输出结果为DataStream。
// TODO: 2022/2/8 注意在运行之前,需要分别在两个流上应用 assignTimestampsAndWatermarks() 方法获取事件时间戳和水印。
//interval join只支持事件时间的场景
//只能支持两条流的关联
//在右流上划分一个范围区间,左流关联右流
DataStream<Times> timesSource = environment.addSource(new TimeDB());
//获取事件时间戳和水印。
DataStream<Times> timeStream = timesSource
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Times>(Time.seconds(0)) {
@Override
public long extractTimestamp(Times times) {
return times.getTimes();
}
});
KeyedStream<Times, Tuple> tid = timeStream.keyBy("tid");
//获取事件时间戳和水印。
DataStream<Tuple2<Integer, Long>> operator = environment.addSource(new StudentDB())
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple2<Integer, Long>>(Time.seconds(0)) {
@Override
public long extractTimestamp(Tuple2<Integer, Long> integerLongTuple2) {
return integerLongTuple2.f1;
}
});
KeyedStream<Tuple2<Integer, Long>, Tuple> keyedStream = operator.keyBy(0);
DataStream<Tuple4<Integer, Double, Long, Long>> intervalstream = tid.intervalJoin(keyedStream)
.between(Time.milliseconds(-2000), Time.milliseconds(1000))
//包含上边界
.upperBoundExclusive()
//包含下边界
.lowerBoundExclusive()
//把两个流的字段合并成需要的字段
.process(new ProcessJoinFunction<Times, Tuple2<Integer, Long>, Tuple4<Integer, Double, Long, Long>>() {
@Override
public void processElement(Times times, Tuple2<Integer, Long> ms, Context context, Collector<Tuple4<Integer, Double, Long, Long>> out) throws Exception {
out.collect(new Tuple4<Integer, Double, Long, Long>(ms.f0, times.getTem(), ms.f1, times.getTimes()));
}
});
intervalstream.print("intervalstream");
WindowCoGroup 两个DataStream在相同时间窗口上应用CoGroup运算,输出结果为DataStream,CoGroup和Join功能类似,但是更加灵活。
DataStream<String> coGroup = tid.coGroup(keyedStream)
.where(times -> times.getTid()).equalTo(integerLongTuple2 -> integerLongTuple2.f0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(3)))
.apply(new CoGroupFunction<Times, Tuple2<Integer, Long>, String>() {
@Override
public void coGroup(Iterable<Times> first, Iterable<Tuple2<Integer, Long>> second, Collector<String> out) throws Exception {
StringBuilder stringBuilder = new StringBuilder();
first.forEach(item -> stringBuilder.append(item.getTid() + "," + item.getTem() + ","));
second.forEach(item -> stringBuilder.append(item.f0 + "," + item.f1));
out.collect(stringBuilder.toString());
}
});
coGroup.print("cogroup");
Connect 连接(connect)两个DataStream输入流,并且保留其类型,输出流为ConnectedStream。两个数据流之间可以共享状态。
//流1按照id分组
DataStream<Times> timesSource = environment.addSource(new TimeDB());
KeyedStream<Times, Tuple> tstream = timesSource.keyBy("tid");
//流2按照id分组
DataStream<Student> studentsource = environment.addSource(new StudentDB()).map(new MapFunction<Tuple2<Integer, Long>, Student>() {
@Override
public Student map(Tuple2<Integer, Long> mos) throws Exception {
return new Student(mos.f0, mos.f1);
}
});
KeyedStream<Student, Tuple> studentstream = studentsource.keyBy("sid");
//定义两个侧切流
OutputTag<Times> times = new OutputTag<Times>("times"){};
OutputTag<Student> student = new OutputTag<Student>("student"){};
// TODO: 2022/2/9 Connect 连接(connect)两个DataStream输入流,并且保留其类型,输出流为ConnectedStream。两个数据流之间可以共享状态。
DataStream<Tuple4<Integer, Double, Long, Long>> connect = tstream.connect(studentstream)
.process(new CoProcessFunction<Times, Student, Tuple4<Integer, Double, Long, Long>>() {
//流1的状态
ValueState<Times> timeState;
//流2的状态
ValueState<Student> studentState;
//定义一个可以删除的定时器
ValueState<Long> dataState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
// 初始化状态
timeState = getRuntimeContext().getState(new ValueStateDescriptor<>("timeState", Times.class));
studentState = getRuntimeContext().getState(new ValueStateDescriptor<>("studentState",Student.class));
dataState = getRuntimeContext().getState(new ValueStateDescriptor<>("dataState", Long.class));
}
//流1的逻辑
@Override
public void processElement1(Times times, Context ctx, Collector<Tuple4<Integer, Double, Long, Long>> out) throws Exception {
Student studentvalue = studentState.value();
//流2不为空表示流2先来,直接将两个流拼接发到下游
if (studentvalue != null) {
out.collect(Tuple4.of(times.getTid(), times.getTem(), times.getTimes(),studentvalue.getStiems()));
//清空流2对应的state信息
studentState.clear();
// 流2来了就可以删除定时器了,并把定时器的状态清除
ctx.timerService().deleteEventTimeTimer(dataState.value());
dataState.clear();
} else {
//流2还没有来,将流1放入stast1中
timeState.update(times);
//并注册一个一分钟定时器,流1中的 eventTime + 60 s
Long timem = 1111L + 60000;
dataState.update(timem);
ctx.timerService().registerEventTimeTimer(timem);
}
}
//流2和流1处理逻辑相同
@Override
public void processElement2(Student tue, Context ctx, Collector<Tuple4<Integer, Double, Long, Long>> out) throws Exception {
Times timevalue = timeState.value();
//流2不为空表示流2先来,直接将两个流拼接发到下游
if (timevalue != null) {
out.collect(Tuple4.of(timevalue.getTid(), timevalue.getTem(), timevalue.getTimes(), tue.getStiems()));
//清空流2对应的state信息
timeState.clear();
// 流2来了就可以删除定时器了,并把定时器的状态清除
ctx.timerService().deleteEventTimeTimer(dataState.value());
dataState.clear();
} else {
//流2还没有来,将流1放入stast1中
studentState.update(tue);
//并注册一个一分钟定时器,流1中的 eventTime + 60 s
Long timem = 1111L + 60000;
dataState.update(timem);
ctx.timerService().registerEventTimeTimer(timem);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple4<Integer, Double, Long, Long>> out) throws Exception {
super.onTimer(timestamp, ctx, out);
//定时器触发了,一分钟内还没有收到两个流
//流1不为空,则将流1侧切输出
if (timeState.value() != null) {
ctx.output(times, timeState.value());
}
if (studentState.value() != null) {
ctx.output(student, studentState.value());
}
timeState.clear();
studentState.clear();
}
});
connect.print("connect");
CoMap 和 CoFlatMap 在ConnectedStream上应用Map和FlatMap运算,输出流为DataStream。其基本逻辑类似于在一般DataStream上的Map和FlatMap运算,区别在于CoMap转换有2个输入,Map转换有1个输入,CoFlatMap同理
ConnectedStreams<Times, Student> connectedStreams = tstream.connect(studentstream);
CoMap
DataStream<String> coMap = connectedStreams.map(new CoMapFunction<Times, Student, String>() {
@Override
public String map1(Times times) throws Exception {
Long timems = times.getTimes() - date.getTime();
if (timems > 0) {
return "有延迟" + timems.toString() + "ms";
} else {
return "无延迟";
}
}
@Override
public String map2(Student student) throws Exception {
Long timns = student.getStiems() - date.getTime();
if (timns > 0) {
return "有延迟" + timns.toString() + "ms";
} else {
return "无延迟";
}
}
});
coMap.print("coMap");
CoFlatMap
DataStream<String> coflatMap = connectedStreams.flatMap(new CoFlatMapFunction<Times, Student, String>() {
@Override
public void flatMap1(Times times, Collector<String> out) throws Exception {
out.collect(times.toString());
}
@Override
public void flatMap2(Student student, Collector<String> out) throws Exception {
out.collect(student.toString());
}
});
coflatMap.print("coflatMap");
Split 将DataStream按照条件切分为多个DataStream,输出流为SplitDataStream。该方法已经标记为Deprecated废弃,推荐使用SideOutput,
SplitStream<Times> splitStream = timesSource.split(new OutputSelector<Times>() {
@Override
public Iterable<String> select(Times times) {
List<String> output = new ArrayList<>();
if (times.getTem() > 20) {
output.add("even");
} else {
output.add("odd");
}
return output;
}
});
Select Select与Split运算配合使用,在Split运算中切分的多个DataStream中,Select用来选择其中某一个具体的DataStream,
splitStream.select("even").print("split-even");
splitStream.select("odd").print("split-odd");
Iterate 在API层面上,对DataStream应用迭代会生成1个IteractiveStream,然后在IteractiveStream上应用业务处理逻辑,最终生成1个新的DataStream,IteractiveStream本质上来说是一种中间数据流对象。
IterativeStream<Student> iterativeStream = studentstream.iterate();
DataStream<Boolean> map = iterativeStream.map(new MapFunction<Student, Boolean>() {
@Override
public Boolean map(Student student) throws Exception {
return student.getSid() > 5;
}
});
//大于5的进入反循环通道,小于5的直接发送给下游
DataStream<Student> iterate = iterativeStream.filter(new FilterFunction<Student>() {
@Override
public boolean filter(Student student) throws Exception {
return student.getSid() > 5;
}
});
iterativeStream.closeWith(iterate);
//循环通道的数据
iterativeStream.print();
DataStream<Boolean> filter = map.filter(new FilterFunction<Boolean>() {
@Override
public boolean filter(Boolean aBoolean) throws Exception {
return aBoolean;
}
});
//下游的数据
filter.print();
Extract Timestamps 从记录中提取时间戳,并生成Watermark
SingleOutputStreamOperator<Times> timestampsAndWatermarks = timesSource.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Times>() {
private Long maxTimestamp = 0L;
//延迟
private Long delay = 0L;
//自定义Timestamp提取规则
@Nullable
@Override
public Watermark getCurrentWatermark() {
//周期性生成Watermark
// System.out.println("水位线:" + (maxTimestamp - delay));
return new Watermark(maxTimestamp - delay);
}
@Override
public long extractTimestamp(Times times, long l) {
//放慢处理速度,否则可能只会生成一条水位线
try {
Thread.sleep(1000);
} catch (Exception ex) {
}
//比较当前事件时间和最大时间戳maxTimestamp(并更新)
maxTimestamp = Math.max(times.getTimes(), maxTimestamp);
// System.out.println("时间:" + times.getTimes());
//提取时间戳
return times.getTimes();
}
});
timestampsAndWatermarks.print("timestampsAndWatermarks");
Project 该类运算只适用于Tuple类型的DataStream,使用Project选取子Tuple,可以选择Tuple的部分元素,可以改变元素顺序,类似于SQL语句中的Select子句
DataStream<Tuple> project = environment.addSource(new StudentDB()).project(1,0);
project.print("project");
执行程序
environment.execute();