侧流输出
需求: 将行为事件流进行分流。A事件分到一个流,B事件分到另一个流,其他事件保留在主流。
SingleOutputStreamOperator<EventLog> processed = streamSource.process(new ProcessFunction<EventLog, EventLog>() {
/**
*
* @param eventLog 输入数据
* @param ctx 上下文,它能提供 侧输出 功能
* @param out 主流输出收集器
* @throws Exception
*/
@Override
public void processElement(EventLog eventLog, ProcessFunction<EventLog, EventLog>.Context ctx, Collector<EventLog> out) throws Exception {
String eventId = eventLog.getEventId();
if ("appLaunch".equals(eventId)) {
ctx.output(new OutputTag<EventLog>("launch", TypeInformation.of(EventLog.class)), eventLog);
} else if ("putBack".equals(eventId)) {
ctx.output(new OutputTag<String>("back", TypeInformation.of(String.class)), JSON.toJSONString(eventLog));
}
out.collect(eventLog);
}
});
// 获取 launch 测流数据
DataStream<EventLog> launchStream = processed.getSideOutput(new OutputTag<EventLog>("launch", TypeInformation.of(EventLog.class)));
// 获取 back 测流数据
DataStream<String> backStream = processed.getSideOutput(new OutputTag<String>("back", TypeInformation.of(String.class)));
launchStream.print("launch");
backStream.print("back");
processed.print("main");
双流 connect
ConnectedStreams<String, String> connectedStreams = stream1.connect(stream2);
SingleOutputStreamOperator<String> resultStream = connectedStreams.map(new CoMapFunction<String, String, String>() {
// 共同的状态数据
String prefix = "prefix_";
//对 左流 处理的逻辑
@Override
public String map1(String value) throws Exception {
// 把数字*10,再返回字符串
return prefix + (Integer.parseInt(value) * 10) + "";
}
//对 右流 处理的逻辑
@Override
public String map2(String value) throws Exception {
return prefix + value.toUpperCase();
}
});
resultStream.print();
双流 union
参与 union 的流,必须数据类型一致
DataStream<String> unioned = stream1.union(stream2);
unioned.map(s -> "prefix_" + s).print();
双流 cogroup
可实现两个流的数据进行窗口关联(包含inner ,left, right, outer)
DataStream<String> resultStream = s1.coGroup(s2)
.where(tp -> tp.f0) // 左流的 f0 字段
.equalTo(tp -> tp.f0) // 右流的 f0 字段
.window(TumblingProcessingTimeWindows.of(Time.seconds(10))) // 划分窗口
.apply(new CoGroupFunction<Tuple2<String, String>, Tuple3<String, String, String>, String>() {
/**
* @param first 是协同组中的第一个流的数据
* @param second 是协同组中的第二个流的数据
* @param out 是处理结果的输出器
* @throws Exception
*/
@Override
public void coGroup(Iterable<Tuple2<String, String>> first, Iterable<Tuple3<String, String, String>> second, Collector<String> out) throws Exception {
// 在这里实现 left out join
for (Tuple2<String, String> t1 : first) {
boolean flag = false;
for (Tuple3<String, String, String> t2 : second) {
// 拼接两表字段输出
out.collect(t1.f0 + "," + t1.f1 + "," + t2.f0 + "," + t2.f1 + "," + t2.f2);
flag = true;
}
if (!flag) {
// 如果能走到这里面,说明右表没有数据,则直接输出左表数据
out.collect(t1.f0 + "," + t1.f1 + "," + null + "," + null + "," + null);
}
}
// TODO 实现 right out join
// TODO 实现 full out join
// TODO 实现 inner join
}
});
resultStream.print();
双流 join
只能得到关联上的数据,即 inner join,其他类型join需要使用 coGroup
DataStream<String> joinedStream = s1.join(s2)
.where(tp2 -> tp2.f0)
.equalTo(tp3 -> tp3.f0)
.window(TumblingProcessingTimeWindows.of(Time.seconds(20)))
.apply(new JoinFunction<Tuple2<String, String>, Tuple3<String, String, String>, String>() {
@Override
public String join(Tuple2<String, String> t1, Tuple3<String, String, String> t2) throws Exception {
return t1.f0 + "," + t1.f1 + "," + t2.f0 + "," + t2.f1 + "," + t2.f2;
}
});
joinedStream.print();
广播流
场景:事实表流数据 和 维度表流数据做关联,此时,一般把维度表流数据转换为广播流。
// 将字典数据所在流: s2 ,转成广播流
MapStateDescriptor<String, Tuple2<String, String>> userInfoStateDesc = new MapStateDescriptor<>("userInfoStateDesc", TypeInformation.of(String.class), TypeInformation.of(new TypeHint<Tuple2<String, String>>() {}));
BroadcastStream<Tuple3<String, String, String>> s2BroadcastStream = s2.broadcast(userInfoStateDesc);
// 哪个流处理中需要用到广播状态数据,就要 去 连接 connect 这个广播流
BroadcastConnectedStream<Tuple2<String, String>, Tuple3<String, String, String>> connected = s1.connect(s2BroadcastStream);
SingleOutputStreamOperator<String> resultStream = connected.process(new BroadcastProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, String>() {
/**
* 本方法,是用来处理 主流中的数据(每来一条,调用一次)
* @param element 左流(主流)中的一条数据
* @param ctx 上下文
* @param out 输出器
* @throws Exception
*/
@Override
public void processElement(Tuple2<String, String> element, BroadcastProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, String>.ReadOnlyContext ctx, Collector<String> out) throws Exception {
// 通过 ReadOnlyContext ctx 取到的广播状态对象,是一个 “只读 ” 的对象;
ReadOnlyBroadcastState<String, Tuple2<String, String>> broadcastState = ctx.getBroadcastState(userInfoState);
if (broadcastState != null) {
Tuple2<String, String> userInfo = broadcastState.get(element.f0);
out.collect(element.f0 + "," + element.f1 + "," + (userInfo == null ? null : userInfo.f0) + "," + (userInfo == null ? null : userInfo.f1));
} else {
out.collect(element.f0 + "," + element.f1 + "," + null + "," + null);
}
}
/**
*
* @param element 广播流中的一条数据
* @param ctx 上下文
* @param out 输出器
* @throws Exception
*/
@Override
public void processBroadcastElement(Tuple3<String, String, String> element, BroadcastProcessFunction<Tuple2<String, String>, Tuple3<String, String, String>, String>.Context ctx, Collector<String> out) throws Exception {
// 从上下文中,获取广播状态对象(可读可写的状态对象)
BroadcastState<String, Tuple2<String, String>> broadcastState = ctx.getBroadcastState(userInfoState);
// 然后将获得的这条 广播流数据, 拆分后,装入广播状态
broadcastState.put(element.f0, Tuple2.of(element.f1, element.f2));
}
});
resultStream.print();