第1章 Flink State介绍
Flink有两种基本类型的状态:Managed State(托管状态)、Raw State(原生状态)。
Managed State是Flink Runtime托管,支持了常见的数据结构:ValueState、ListState、MapState等等;
Raw State则是由用户自己管理,只支持字节数组的数据结构。任何上层的数据结构需要序列化为字节数组。使用时,需要用户进行序列化。
以下完整代码请查阅github:https://github.com/zihaodeng/hellobigdata/tree/main/flink-online/flink-datastream-state
备注:生成环境中一定要考虑state的清理,本文涉及程序只做演示。
第2章 Managed State
Managed State由Flink Runtime托管,可以分为两种类型:Keyed State和Operator State。
2.1 Keyed State
Keyed State只能作用于KeyedStream,作用于相同Key的State。
使用方面:重写Rich Function接口类,比如 RichMapFunction、 RichFlatMapFunction、RichAggregateFunction 。
2.1.1 ValueState
public class ValueStateExamples {
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
});
// 计算
dataStream.keyBy(0)
.flatMap(new RichFlatMapFunction<Tuple3<String, Integer, Long>, Tuple2<String, Integer>>() {
private ValueState<Tuple2<String,Integer>> valueState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ValueStateDescriptor<Tuple2<String, Integer>> valueStateDescriptor =
new ValueStateDescriptor<Tuple2<String, Integer>>(
"ValueStateDesc"
, TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {}));
// keyedstate设置state超时
StateTtlConfig ttlConfig = StateTtlConfig
.newBuilder(Time.seconds(60))//当前时间 - 上一次使用时间 >=60秒 则超时
.setUpdateType(StateTtlConfig.UpdateType.OnCreateAndWrite)//OnCreateAndWrite创建和写入 OnReadAndWrite读取和写入
.setStateVisibility(StateTtlConfig.StateVisibility.NeverReturnExpired)//NeverReturnExpired超时元素绝不返回 ReturnExpiredIfNotCleanedUp数据没被删除可以返回
.build();
valueStateDescriptor.enableTimeToLive(ttlConfig);
valueState = getRuntimeContext().getState(valueStateDescriptor);
}
@Override
public void flatMap(Tuple3<String, Integer, Long> tupleInput, Collector<Tuple2<String, Integer>> collector) throws Exception {
Tuple2<String, Integer> value = valueState.value();
if (value == null)
value=Tuple2.of(tupleInput.f0,tupleInput.f1);
else {
value.f0 = tupleInput.f0;
value.f1 += tupleInput.f1;
}
valueState.update(value);
collector.collect(value);
//valueState.clear();
}
})
.print();
env.execute("zmboosum");
}
}
运行结果:

2.1.2 MapState
public class MapStateExamples {
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
});
// 计算
dataStream.keyBy(new KeySelector<Tuple3<String, Integer, Long>,String>() {
public String getKey(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return tupleInput.f0;
}
})
.flatMap(new RichFlatMapFunction<Tuple3<String, Integer, Long>, Tuple2<String, Integer>>() {
private MapState<String,Integer> mapState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
MapStateDescriptor<String,Integer> mapStateDescriptor=
new MapStateDescriptor<String, Integer>(
"MapStateDesc"
,String.class
,Integer.class
);
mapState=getRuntimeContext().getMapState(mapStateDescriptor);
}
@Override
public void flatMap(Tuple3<String, Integer, Long> tupleInput, Collector<Tuple2<String, Integer>> collector) throws Exception {
String key = tupleInput.f0;
Integer valueSum = tupleInput.f1;
if(mapState.contains(key)){
valueSum += mapState.get(key);
}
mapState.put(key,valueSum);
collector.collect(Tuple2.of(key, valueSum));
//mapState.clear();
}
})
.print();
env.execute("zmboosum");
}
}
运行结果:

2.1.3 ListState
public class ListStateExamples {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//...
// source 模拟数据源 自定义
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
});
// 计算
dataStream.keyBy(new KeySelector<Tuple3<String, Integer, Long>, String>() {
public String getKey(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return tupleInput.f0;
}
})
.flatMap(new RichFlatMapFunction<Tuple3<String, Integer, Long>, Tuple2<String, Integer>>() {
private ListState<Tuple2<String, Integer>> listState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ListStateDescriptor<Tuple2<String, Integer>> listStateDescriptor =
new ListStateDescriptor<Tuple2<String, Integer>>(
"ListStateDesc"
, TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {})
);
listState = getRuntimeContext().getListState(listStateDescriptor);
}
@Override
public void flatMap(Tuple3<String, Integer, Long> tupleInput, Collector<Tuple2<String, Integer>> collector) throws Exception {
String key = tupleInput.f0;
Integer value = tupleInput.f1;
Iterator<Tuple2<String, Integer>> iterator = listState.get().iterator();
Boolean bListIsNull = true;
Tuple2<String, Integer> tupleTmp = null;
while (iterator.hasNext()) {
bListIsNull = false;
Tuple2<String, Integer> tupleVal = iterator.next();
// 相同的key
//if (tupleVal.f0.equals(tupleInput.f0)) {
tupleTmp = Tuple2.of(tupleInput.f0, tupleVal.f1 + tupleInput.f1);
//}
}
if (bListIsNull) {
listState.add(Tuple2.of(key, value));
collector.collect(Tuple2.of(key, value));
}else {
listState.add(tupleTmp);
collector.collect(tupleTmp);
}
//listState.clear();
}
})
.print();
env.execute("zmboosum");
}
}
运行结果:

2.1.4 ReducingState
public class ReducingStateExamples {
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源 自定义
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
});
// 计算
dataStream.keyBy(new KeySelector<Tuple3<String, Integer, Long>, String>() {
public String getKey(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return tupleInput.f0;
}
})
.flatMap(new RichFlatMapFunction<Tuple3<String, Integer, Long>, Tuple2<String, Integer>>() {
private ReducingState<Integer> reducingState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
ReducingStateDescriptor<Integer> reducingStateDescriptor=
new ReducingStateDescriptor<Integer>(
"ReducingStateDesc"
, new ReduceFunction<Integer>() {
public Integer reduce(Integer integer, Integer t1) throws Exception {
return integer + t1;
}
}
,Integer.class
);
reducingState = getRuntimeContext().getReducingState(reducingStateDescriptor);
}
@Override
public void flatMap(Tuple3<String, Integer, Long> tupleInput, Collector<Tuple2<String, Integer>> collector) throws Exception {
// 执行ReduceFunction reduce函数
reducingState.add(tupleInput.f1);
collector.collect(Tuple2.of(tupleInput.f0, reducingState.get()));
//reducingState.clear();
}
})
.print();
env.execute("zmboosum");
}
}
运行结果:

2.1.5 AggregatingState
public class AggregatingStateExamples {
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源 自定义
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
});
// 计算
dataStream.keyBy(new KeySelector<Tuple3<String, Integer, Long>, String>() {
public String getKey(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return tupleInput.f0;
}
})
.flatMap(new RichFlatMapFunction<Tuple3<String, Integer, Long>, Tuple2<String, Integer>>() {
private AggregatingState<Tuple2<String, Integer>, Tuple2<String, Integer>> aggregatingState;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
AggregatingStateDescriptor<Tuple2<String, Integer>,Tuple2<String, Integer>,Tuple2<String, Integer>> aggregatingStateDescriptor=
new AggregatingStateDescriptor<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>(
""
, new AggregateFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, Tuple2<String, Integer>>() {
public Tuple2<String, Integer> createAccumulator() {
return Tuple2.of("", 0);
}
public Tuple2<String, Integer> add(Tuple2<String, Integer> tupleInput, Tuple2<String, Integer> tupleAcc) {
if (tupleAcc.f0.equals(tupleInput.f0)) {
return Tuple2.of(tupleInput.f0, tupleInput.f1 + tupleAcc.f1);
} else {
return tupleInput;
}
}
public Tuple2<String, Integer> getResult(Tuple2<String, Integer> tupleAcc) {
return tupleAcc;
}
public Tuple2<String, Integer> merge(Tuple2<String, Integer> stringIntegerTuple2, Tuple2<String, Integer> acc1) {
return null;
}
}
, TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {})
);
aggregatingState = getRuntimeContext().getAggregatingState(aggregatingStateDescriptor);
}
@Override
public void flatMap(Tuple3<String, Integer, Long> tupleInput, Collector<Tuple2<String, Integer>> collector) throws Exception {
// 执行AggregateFunction add
aggregatingState.add(Tuple2.of(tupleInput.f0, tupleInput.f1));
// 执行AggregateFunction getResult
Tuple2<String, Integer> tupleOutput = aggregatingState.get();
collector.collect(tupleOutput);
//aggregatingState.clear();
}
})
.print();
env.execute("zmboosum");
}
}
运行结果:

2.2 Operator State
Operator State是一种non-keyed state,Operator State可以作用于所有算子。
使用方面:实现CheckpointedFunction或者ListCheckpointed两个接口来定义操作Managed Operator State的函数。ListCheckpointed接口是CheckpointedFunction的变体。它仅支持与恢复甚至分裂的再分配方案列表式的状态。
2.2.1 ListState
ListState并发度在改变的时候,会将并发上的每个List都取出,然后把这些List合并到一个新的List,再根据元素的个数在均匀分配给新的Task。
public class ListStateExamples {
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源 自定义
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new Covid19Source());
// 计算
dataStream.map(new MapFunction<Tuple3<String, Integer, Long>, Tuple2<String,Integer>>() {
public Tuple2<String, Integer> map(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return Tuple2.of(tupleInput.f0, tupleInput.f1);
}
}).addSink(new Covid19Sink(10));
env.execute("zmboosum");
}
/**
* 自定义source
*/
private static class Covid19Source implements SourceFunction<Tuple3<String, Integer, Long>>, CheckpointedFunction {
/** current offset for exactly once semantics */
private Long offset = 0L;
/** flag for job cancellation */
private volatile boolean isRunning = true;
/** Our state object. */
private ListState<Long> listState;
/**
* checkpoint
* @param functionSnapshotContext
* @throws Exception
*/
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
//保存最新的offset
listState.clear();
listState.add(offset);
}
public void initializeState(FunctionInitializationContext context) throws Exception {
ListStateDescriptor<Long> listStateDescriptor =
new ListStateDescriptor<Long>(
"ListStateDesc"
, Long.class
);
listState = context.getOperatorStateStore().getListState(listStateDescriptor);
//获取最新的offset
for (Long l : listState.get()) {
offset = l;
}
}
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
Object checkpointLock = sourceContext.getCheckpointLock();
while (isRunning) {
// output and state update are atomic
synchronized (checkpointLock) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
offset += 1;
Thread.sleep(1000);
}
}
}
public void cancel() {
isRunning = false;
}
}
/**
* 自定义sink
*/
private static class Covid19Sink implements SinkFunction<Tuple2<String,Integer>>, CheckpointedFunction{
/** 多少条数据一次sink */
private final int threshold;
/** buffer缓冲state */
private List<Tuple2<String,Integer>> bufferedElements;
/** cp state */
private ListState<Tuple2<String, Integer>> checkpointedState;
public Covid19Sink(int threshold){
this.threshold = threshold;
this.bufferedElements = new ArrayList<Tuple2<String, Integer>>();
}
public void initializeState(FunctionInitializationContext context) throws Exception {
ListStateDescriptor<Tuple2<String, Integer>> listStateDescriptor =
new ListStateDescriptor<Tuple2<String, Integer>>(
"ListStateDesc"
, TypeInformation.of(new TypeHint<Tuple2<String, Integer>>() {})
);
checkpointedState = context.getOperatorStateStore().getListState(listStateDescriptor);
//故障恢复
if (context.isRestored()) {
for (Tuple2<String, Integer> element : checkpointedState.get()) {
bufferedElements.add(element);
}
}
}
/**
* checkpoint
* @param functionSnapshotContext
* @throws Exception
*/
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
checkpointedState.clear();
for (Tuple2<String, Integer> element : bufferedElements) {
checkpointedState.add(element);
}
}
public void invoke(Tuple2<String, Integer> value, Context context) throws Exception {
bufferedElements.add(value);
if (bufferedElements.size() == threshold) {
//每满threshold数量send一次
for (Tuple2<String, Integer> element: bufferedElements) {
// send it to the sink
System.out.println(element);
}
bufferedElements.clear();
}
}
}
}
运行结果:

2.2.2 UnionListState
UnionListState相比于ListState更加灵活,把划分的方式交给用户去做,当改变并发的时候,会将原来的List拼接起来。然后不做划分,直接交给用户。
union list state 目前被广泛使用在 kafka connector 中,不过可能用户日常开发中较少遇到,他的语义是从检查点恢复之后每个并发 task 内拿到的是原先所有operator 上的 state,如下图所示:
kafka connector 使用该功能,为的是从检查点恢复时,可以拿到之前的全局信息,如果用户需要使用该功能,需要切记恢复的 task 只取其中的一部分进行处理和用于下一次 snapshot,否则有可能随着作业不断的重启而导致 state 规模不断增长。
--摘自Ververica官方博客
public class UnionListStateExamples {
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源 自定义
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new Covid19Source());
// 计算
dataStream.map(new MapFunction<Tuple3<String, Integer, Long>, Tuple2<String,Integer>>() {
public Tuple2<String, Integer> map(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return Tuple2.of(tupleInput.f0, tupleInput.f1);
}
}).addSink(new Covid19Sink(10)).setParallelism(2);
env.execute("zmboosum");
}
/**
* 自定义source
*/
private static class Covid19Source extends RichSourceFunction<Tuple3<String, Integer, Long>> implements CheckpointedFunction{
private int subtaskIndex = 0;
private Long offset = 0L;
private ListState<Tuple2<Integer,Long>> unionListState;
private volatile boolean isRunning = true;
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
//保存最新的subtask的offset
//context.getCheckpointId();
unionListState.clear();
unionListState.add(Tuple2.of(subtaskIndex, offset));
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
int currentSubtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
ListStateDescriptor<Tuple2<Integer,Long>> listStateDescriptor =
new ListStateDescriptor<Tuple2<Integer,Long>>(
"UnionListStateDesc"
, TypeInformation.of(new TypeHint<Tuple2<Integer,Long>>() {})
);
unionListState = context.getOperatorStateStore().getUnionListState(listStateDescriptor);
//获取最新的
if(context.isRestored()){
//unionListState需要用户根据自己的业务,过滤其中需要的State
for (Tuple2<Integer,Long> element : unionListState.get()) {
//仅供参考
if (element.f0 == currentSubtaskIndex) {
offset = element.f1;
}
}
}
}
@Override
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
Object checkpointLock = sourceContext.getCheckpointLock();
while (isRunning) {
// output and state update are atomic
synchronized (checkpointLock) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
offset += 1;
Thread.sleep(1000);
}
}
}
@Override
public void cancel() {
isRunning = false;
}
}
/**
* 自定义sink
*/
private static class Covid19Sink extends RichSinkFunction<Tuple2<String, Integer>> implements CheckpointedFunction{
/** 多少条数据一次sink */
private final int threshold;
private int subtaskIndex = 0;
/** buffer缓冲state */
private List<Tuple3<Integer,String,Integer>> bufferedElements;
/** cp state */
private ListState<Tuple3<Integer,String, Integer>> checkpointedState;
public Covid19Sink(int threshold){
this.threshold = threshold;
this.bufferedElements = new ArrayList<Tuple3<Integer,String,Integer>>();
}
@Override
public void open(Configuration parameters) throws Exception {
super.open(parameters);
subtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
}
@Override
public void snapshotState(FunctionSnapshotContext functionSnapshotContext) throws Exception {
checkpointedState.clear();
for (Tuple3<Integer,String,Integer> element : bufferedElements) {
checkpointedState.add(element);
}
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
int currentSubtaskIndex = getRuntimeContext().getIndexOfThisSubtask();
ListStateDescriptor<Tuple3<Integer,String, Integer>> listStateDescriptor =
new ListStateDescriptor<Tuple3<Integer,String, Integer>>(
"ListStateDesc"
, TypeInformation.of(new TypeHint<Tuple3<Integer,String, Integer>>() {})
);
checkpointedState = context.getOperatorStateStore().getUnionListState(listStateDescriptor);
//故障恢复
if (context.isRestored()) {
//unionListState恢复后,将获取到所有状态的信息;若有需要,用户要根据自己的业务,过滤其中需要的State
for (Tuple3<Integer,String, Integer> element : checkpointedState.get()) {
if (element.f0 == currentSubtaskIndex) {
bufferedElements.add(element);
}
}
}
}
@Override
public void invoke(Tuple2<String, Integer> value, Context context) throws Exception {
bufferedElements.add(Tuple3.of(subtaskIndex, value.f0, value.f1));
if (bufferedElements.size() == threshold) {
//每满threshold数量send一次
for (Tuple3<Integer,String,Integer> element: bufferedElements) {
// send it to the sink
System.out.println(element);
}
bufferedElements.clear();
}
}
}
}
运行结果:

2.2.3 Broadcast State
BroadcastState在并发改变的时候,会将这些数据复制到新的Task。
在开发过程中,如果遇到需要下发/广播配置、规则等低吞吐事件流到下游所有 task 时,就可以使用 Broadcast State 特性。下游的 task 接收这些配置、规则并保存为 BroadcastState, 将这些配置应用到另一个数据流的计算中(connect) 。
public class BroadcastStateExamples {
// MapStateDescriptor 表述的 Broadcast State
private static final MapStateDescriptor<String, Integer> descriptor = new MapStateDescriptor(
"BroadcastStateConfig"
,String.class
,Integer.class
);
public static void main(String[] args) throws Exception {
//...
// source 模拟数据源
DataStream<Tuple3<String,Integer,Long>> dataStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 1, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 10, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
});
// 自定义数据源(模拟) 进行一个简单的map转换 再转换成广播流
BroadcastStream<Tuple2<String,Integer>> broadcastStream = env.addSource(new SourceFunction<Tuple3<String, Integer, Long>>() {
public void run(SourceContext<Tuple3<String, Integer, Long>> sourceContext) throws Exception {
while (true) {
if(System.currentTimeMillis() %2 ==0) {
sourceContext.collect(new Tuple3<String,Integer,Long>("SZ", 0, System.currentTimeMillis()));
}else{
sourceContext.collect(new Tuple3<String, Integer, Long>("BJ", 1, System.currentTimeMillis()));
}
Thread.sleep(1000);
}
}
public void cancel() {}
}).map(new MapFunction<Tuple3<String, Integer, Long>, Tuple2<String,Integer>>() {
public Tuple2<String, Integer> map(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return Tuple2.of(tupleInput.f0, tupleInput.f1);
}
}).broadcast(descriptor);
dataStream.keyBy(new KeySelector<Tuple3<String, Integer, Long>,String>() {
public String getKey(Tuple3<String, Integer, Long> tupleInput) throws Exception {
return tupleInput.f0;
}
})
.connect(broadcastStream)
.process(new Covid19Broadcast())
.print();
env.execute("zmboosum");
}
/**
* KeyedBroadcastProcessFunction参数:
* 1,KeyStream中Key的类型
* 2,业务流KeyedStream数据类型
* 3,广播流BroadcastStream数据类型
* 4,输出的数据类型
*/
private static class Covid19Broadcast
extends KeyedBroadcastProcessFunction<String, Tuple3<String, Integer, Long>, Tuple2<String, Integer>, Tuple3<Integer, Integer, Long>> {
/**
* 在每条正常的业务流数据(KeyedStream)进入的时候进行调用
* @param tupleInput
* @param readOnlyContext 只读的Broadcast State
* @param collector
* @throws Exception
*/
@Override
public void processElement(Tuple3<String, Integer, Long> tupleInput, ReadOnlyContext readOnlyContext, Collector<Tuple3<Integer, Integer, Long>> collector) throws Exception {
ReadOnlyBroadcastState<String,Integer> broadcastState = readOnlyContext.getBroadcastState(descriptor);
collector.collect(new Tuple3<Integer, Integer, Long>(broadcastState.get(tupleInput.f0), tupleInput.f1, tupleInput.f2));
}
/**
* 在每条广播流(BroadcastStream)进入的时候进行调用
* 可用来更新Boardcast State
* @param tupleInput
* @param context
* @param collector
* @throws Exception
*/
@Override
public void processBroadcastElement(Tuple2<String, Integer> tupleInput, Context context, Collector<Tuple3<Integer, Integer, Long>> collector) throws Exception {
if(tupleInput.f0.equals("BJ")) {
context.getBroadcastState(descriptor).put(tupleInput.f0, 8);
}else{
context.getBroadcastState(descriptor).put(tupleInput.f0, tupleInput.f1);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple3<Integer, Integer, Long>> out) throws Exception {
super.onTimer(timestamp, ctx, out);
//可以用来清除State,避免State一直增长
}
}
}
运行结果:

第3章 Raw State
原生状态,用户自定义算子状态。暂不介绍。
第4章 参考文章
https://ci.apache.org/projects/flink/flink-docs-release-1.12/dev/stream/state/state.html
https://ververica.cn/developers/flink-state-best-practices/
今天初七,这两天大部分小伙伴已经陆续开工了,笔者明天也要开工了,在这里祝大家开工大吉,好运常伴!

3712

被折叠的 条评论
为什么被折叠?



