6.5 侧输出流(sideOutput)
6.5.1 功能1:处理允许迟到后迟到的数据
允许迟到数据,窗口也会真正的关闭,如果允许迟到后,还是有迟到的数据怎么办?Flink提供一种侧输出流的机制来处理关窗之后到达的数据。
public class Flink19_Watermark_SideOutput {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<WaterSensor> sensorDS = env
.socketTextStream("hadoop102", 9999)
.map(new MapFunction<String, WaterSensor>() {
@Override
public WaterSensor map(String value) throws Exception {
String[] dats = value.split(",");
return new WaterSensor(
dats[0],
Long.valueOf(dats[1]),
Integer.valueOf(dats[2])
);
}
})
.assignTimestampsAndWatermarks(
WatermarkStrategy
.<WaterSensor>forBoundedOutOfOrderness(Duration.ofSeconds(3))
.withTimestampAssigner(new SerializableTimestampAssigner<WaterSensor>() {
@Override
public long extractTimestamp(WaterSensor element, long recordTimestamp) {
return element.getTs() * 1000L;
}
})
);
//TODO 定义侧输出流标签
OutputTag<WaterSensor> outputTag = new OutputTag<WaterSensor>("late"){};
SingleOutputStreamOperator<String> resultDS = sensorDS
.keyBy(sensor -> sensor.getId())
.window(TumblingEventTimeWindows.of(Time.seconds(10)))
.allowedLateness(Time.seconds(2))
.sideOutputLateData(outputTag)
.process(new ProcessWindowFunction<WaterSensor, String, String, TimeWindow>() {
@Override
public void process(String s, Context context, Iterable<WaterSensor> elements, Collector<String> out) throws Exception {
out.collect(
"key=" + s + "\n" +
"数据为:" + elements + "\n" +
"数量条数:" + elements.spliterator().estimateSize() + "\n" +
"窗口为:[" + context.window().getStart() + "," + context.window().getEnd() + ")\n" +
"=======================================================================\n\n"
);
}
});
resultDS.print("result");
//TODO 获取侧输出流
DataStream<WaterSensor> sideOutput = resultDS.getSideOutput(outputTag);
sideOutput.print("late");
env.execute();
}
}
6.5.2 主流正常执行,侧流实现额外功能
主流输出watersensor,当水位值大于5,输出告警信息。
public class Flink25_Process_SideOutput {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<String> resultDS = env
.socketTextStream("hadoop102", 9999)
.map(new MapFunction<String, WaterSensor>() {
@Override
public WaterSensor map(String value) throws Exception {
String[] dats = value.split(",");
return new WaterSensor(
dats[0],
Long.valueOf(dats[1]),
Integer.valueOf(dats[2])
);
}
})
.assignTimestampsAndWatermarks(WatermarkStrategy
.<WaterSensor>forBoundedOutOfOrderness(Duration.ofSeconds(3))
.withTimestampAssigner(new SerializableTimestampAssigner<WaterSensor>() {
@Override
public long extractTimestamp(WaterSensor element, long recordTimestamp) {
return element.getTs() * 1000L;
}
})
)
.keyBy(sensor -> sensor.getId())
.process(new KeyedProcessFunction<String, WaterSensor, String>() {
@Override
public void processElement(WaterSensor value, Context ctx, Collector<String> out) throws Exception {
if (value.getVc() >= 5) {
OutputTag<String> outputTag = new OutputTag<String>("high") {
};
ctx.output(outputTag, ctx.getCurrentKey() + "监测到水位值高于5!!!");
}
out.collect(value.toString());
}
});
resultDS.print("result");
OutputTag<String> outputTag = new OutputTag<String>("high"){};
//测输出流是根据outputTag的id来判断的
resultDS.getSideOutput(outputTag).print("alarm");
env.execute();
}
}