/**
*
* 广播状态的使用场景
* 1.少量的数据可以使用广播状态(广播状态的数据存储在内存中,如果数据量非常大,不能使用广播状态)
* 2.广播状态的数据可以动态修改
*
* 需要先将维度数据读取过去才能进行关联
*
* 如果要广播的数据量非常大,或者根本无法获得数据
* 为了高效的关联维度数据,可以使用异步IO
*/
public class BroadcastStateDemo {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//维度流
//c001,图书,INSERT
//c002,电脑,INSERT
//c002,文具,UPDATE
//c002,文具,DELETE
DataStreamSource<String> lines1 = env.socketTextStream("linux01", 7777);
//事实流
//o001,1000,c001
//o002,2000,c002
DataStreamSource<String> lines2 = env.socketTextStream("linux01", 8888);
//期望得到的数据
//o001,1000,c001,图书
//对维度流进行处理
SingleOutputStreamOperator<Tuple3<String, String, String>> tpStream1 = lines1.map(new MapFunction<String, Tuple3<String, String, String>>() {
@Override
public Tuple3<String, String, String> map(String input) throws Exception {
String[] fields = input.split(",");
return Tuple3.of(fields[0], fields[1], fields[2]);
}
});
//将维度流数据以MapState的形式广播到下游
MapStateDescriptor<String, String> stateDescriptor = new MapStateDescriptor<>("broadcast-state", Types.STRING, Types.STRING);
BroadcastStream<Tuple3<String, String, String>> broadcastStream = tpStream1.broadcast(stateDescriptor);
//对事实流进行处理
SingleOutputStreamOperator<Tuple3<String, Double, String>> tpStream2 = lines2.map(new MapFunction<String, Tuple3<String, Double, String>>() {
@Override
public Tuple3<String, Double, String> map(String input) throws Exception {
String[] fields = input.split(",");
return Tuple3.of(fields[0], Double.parseDouble(fields[1]), fields[2]);
}
});
//将两个流connect 可以共享状态
SingleOutputStreamOperator<Tuple4<String, Double, String, String>> result = tpStream2.connect(broadcastStream).process(new BroadcastProcessFunction<Tuple3<String, Double, String>, Tuple3<String, String, String>, Tuple4<String, Double, String, String>>() {
//处理事实流
@Override
public void processElement(Tuple3<String, Double, String> input, ReadOnlyContext ctx, Collector<Tuple4<String, Double, String, String>> out) throws Exception {
//获取状态
ReadOnlyBroadcastState<String, String> broadcastState = ctx.getBroadcastState(stateDescriptor);
out.collect(Tuple4.of(input.f0, input.f1, input.f2, broadcastState.get(input.f2)));
}
//处理广播流
@Override
public void processBroadcastElement(Tuple3<String, String, String> input, Context ctx, Collector<Tuple4<String, Double, String, String>> out) throws Exception {
//获取状态
BroadcastState<String, String> broadcastState = ctx.getBroadcastState(stateDescriptor);
String cid = input.f0;
String cname = input.f1;
String type = input.f2;
if ("DELETE".equals(type)) {
//删除状态中对应的数据
broadcastState.remove(cid);
} else {
//添加,更新状态中的数据
broadcastState.put(cid, cname);
}
}
});
result.print();
env.execute();
}
}
Flink--广播状态
最新推荐文章于 2024-05-06 09:51:34 发布