State 操作
关于State的概念在理论篇已经讲过,本篇文章着重讲的是State的实战
主要围绕这两部分
- State的各种种类的使用
- state数据清理
Keyed State
前提条件:keyed state只作用在keyedStream
Keyed State种类
- ValueState
- ListState
- ReducingState
- AggregatingState
- MapState
通性:
-
必须创建StateDescriptor来得到对应的状态句柄
-
所有类型的状态都有clear()方法清除key的状态数据
-
状态通过RuntimeContext进行访问,因此只能在rich functions中使用,这一点十分重要,意思是关于状态的操作实现都要在richFunction里面实现
ValueState
示例代码:
过程:首先是注册状态,然后声明状态,通过RuntimeContext访问状态,value函数获取状态,获取状态后进行自定义的逻辑操作,最后记得清除状态 clear
public class CountWindow extends RichFlatMapFunction<Tuple2<Long,Long>,Tuple2<Long,Double>> { //声明状态 private ValueState<Tuple2<Long,Long>> count; @Override public void open(Configuration parameters) throws Exception { //注册状态 ValueStateDescriptor<Tuple2<Long, Long>> ave = new ValueStateDescriptor<>("ave", TypeInformation.of(new TypeHint<Tuple2<Long, Long>>() { })); //访问state count = getRuntimeContext().getState(ave); } @Override public void flatMap(Tuple2<Long, Long> value, Collector<Tuple2<Long, Double>> out) throws Exception { //获取state值 Tuple2<Long, Long> current = count.value(); //state操作 if(current ==null){ //初始化 current = Tuple2.apply(0L,0L); } current.f0 +=1; current.f1 +=1; count.update(current); if (current.f0>=3){ avg = current.f1/current.f0 out.collect(new Tuple2<>(current.f0,current.f1)); count.clear(); } } }
ListState
过程类似,区别在于由于是List,所以访问state用的是getListState方法,更新使用的是add方法
public class CountListWindow extends RichFlatMapFunction<Tuple2<Long,Long>, Tuple2<Long,Double>> { //声明状态 private ListState<Tuple2<Long, Long>> counts; @Override public void open(Configuration parameters) throws Exception { //注册状态 ListStateDescriptor<Tuple2<Long, Long>> ave = new ListStateDescriptor<>("ave", TypeInformation.of(new TypeHint<Tuple2<Long, Long>>() { })); //访问state counts = getRuntimeContext().getListState(ave); } @Override public void flatMap(Tuple2<Long, Long> value, Collector<Tuple2<Long, Double>> out) throws Exception { //获取state值 Iterable<Tuple2<Long, Long>> current = counts.get(); //state操作 if (current == null) { //初始化 counts.addAll(Collections.emptyList()); } //更新 counts.add(value); //判断 ArrayList<Tuple2<Long, Long>> list = Lists.newArrayList(counts.get()); if (list.size() >= 3) { long count = 0; long sum = 0; for (Tuple2<Long, Long> ele : list) { count++; sum += ele._1; } double avg = (double) sum / count; out.collect(Tuple2.apply(value._1, avg)); counts.clear(); } } }
MapState
public class CountMap extends RichMapFunction<Tuple2<Long,Long>, Tuple2<Long,Double>> { //声明MapState private MapState<String,Long> mapState; @Override public void open(Configuration parameters) throws Exception { //句柄 MapStateDescriptor<String,Long> desc =new MapStateDescriptor<String, Long>("ave",String.class,Long.class) //访问state MapState<String, Long> mapState = getRuntimeContext().getMapState(desc); } @Override public Tuple2<Long, Double> map(Tuple2<Long, Long> value) throws Exception { mapState.put(UUID.randomUUID().toString(),value._1); Iterable<Long> values = mapState.values(); int size = Iterators.size((Iterator<?>) values); if (size>3){ long count = 0; long sum = 0; } mapState.clear(); return null; } }
ReducingState
get获取值 add更新值
public class CountReduce extends RichFlatMapFunction<Tuple2<Long,Long>,Tuple2<Long,Long>> { private ReducingState<Tuple2<Long,Long>> reducerState; @Override public void open(Configuration parameters) throws Exception { //注册state ReducingStateDescriptor desc = new ReducingStateDescriptor("reduce", new ReduceFunction() { @Override public Object reduce(Object value1, Object value2) throws Exception { return Long.parseLong(value1.toString())+ Long.parseLong(value2.toString()); } },Long.class); // ReducingState reducingState = getRuntimeContext().getReducingState(desc); } @Override public void flatMap(Tuple2<Long, Long> value, Collector<Tuple2<Long, Long>> out) throws Exception { reducerState.add(value); out.collect(value); } }
AggregatingState
public class CountAgg extends RichMapFunction<Tuple2<Long,Long>,Tuple2<Long,Long>> { //声明状态 private AggregatingState<Tuple2<Long,Long>,Tuple2<Long,Long>> aggregatingState; @Override public void open(Configuration parameters) throws Exception { //注册状态 AggregatingStateDescriptor descriptor = new AggregatingStateDescriptor("agg", new AggregateFunction() { @Override public Object createAccumulator() { return null; } @Override public Object add(Object value, Object accumulator) { return null; } @Override public Object getResult(Object accumulator) { return null; } @Override public Object merge(Object a, Object b) { return a.toString() + b.toString(); } },Long.class); AggregatingState aggregatingState = getRuntimeContext().getAggregatingState(descriptor); } @Override public Tuple2<Long, Long> map(Tuple2<Long, Long> value) throws Exception { aggregatingState.add(value); System.out.println(value.f0); return null; } }
Operator State
ListState
示例
public class BufferingSink
implements SinkFunction<Tuple2<String, Integer>>,
CheckpointedFunction {
private final int threshold;
private transient ListState<Tuple2<String, Integer>> checkpointedState;
private List<Tuple2<String, Integer>> bufferedElements;
public BufferingSink(int threshold) {
this.threshold = threshold;
this.bufferedElements = new ArrayList<>();
}
@Override
public void invoke(Tuple2<String, Integer> value, Context contex) throws Exception {
bufferedElements.add(value);
if (bufferedElements.size() == threshold) {
for (Tuple2<String, Integer> element: bufferedElements) {
// send it to the sink
}
bufferedElements.clear();
}
}
@Override
public void snapshotState(FunctionSnapshotContext context) throws Exception {
checkpointedState.clear();
for (Tuple2<String, Integer> element : bufferedElements) {
checkpointedState.add(element);
}
}
@Override
public void initializeState(FunctionInitializationContext context) throws Exception {
ListStateDescriptor<Tuple2<String, Integer>> descriptor =
new ListStateDescriptor<>(
"buffered-elements",
TypeInformation.of(new TypshilieHint<Tuple2<String, Integer>>() {}));
checkpointedState = context.getOperatorStateStore().getListState(descriptor);
if (context.isRestored()) {
for (Tuple2<String, Integer> element : checkpointedState.get()) {
bufferedElements.add(element);
}
}
}
}
参考:Flink官方文档