Flink小知识--State Processor API的简单讲解(1) State的读取

什么是State Processor API

官方地址:
https://ci.apache.org/projects/flink/flink-docs-release-1.13/docs/libs/state_processor_api/

使用 State Processor API 可以 读取、写入和修改 savepointscheckpoints ,也可以转为SQL查询来分析和处理状态数据。

  • 引入jar
<dependency>
    <groupId>org.apache.flink</groupId>
    <artifactId>flink-state-processor-api_2.11</artifactId>
    <version>1.13.0</version>
</dependency>

Reading State

1. Operator State 的读取
//读取 Operator State 时,只需指定算子的 uid、状态名称和类型信息。

//例如: FlinkKafkaConsumerBase类中保存 分区 偏移量 
//	private static final String OFFSETS_STATE_NAME = "topic-partition-offset-states";
//	private transient ListState<Tuple2<KafkaTopicPartition, Long>> unionOffsetStates;

DataSet<Integer> listState  = savepoint.readListState<>(
    "my-uid",
    "list-state",
    Types.INT);
DataSet<Integer> listState  = savepoint.readUnionState<>(
    "my-uid",
    "union-state",
    Types.INT);
DataSet<Tuple2<Integer, Integer>> broadcastState = savepoint.readBroadcastState<>(
    "my-uid",
    "broadcast-state",
    Types.INT,
    Types.INT);
   
//在状态描述符(StateDescriptor)中使用了自定义类型序列化器 TypeSerializer
DataSet<Integer> listState = savepoint.readListState<>(
    "uid",
    "list-state", 
    Types.INT,
    new MyCustomIntSerializer());
2. Keyed State的读取

注: java scala 代码产生的state 不要混着读取, 可能造成key序列化错误

读取keyed state时,使用 readKeyedState 指定uid和KeyedStateReaderFunction<KeyType, OutputType> 函数来获取对应的 state

KeyedStateReaderFunction函数允许用户读取任意列和复杂的状态类型,如ListState、MapState和AggregatingState。这意味着如果运算符包含有状态的进程函数,例如:

  • 作业自定义状态checkpoint
 public static class StatefulFunctionWithTime extends KeyedProcessFunction<String, Tuple2<String,Integer>, Void> {

        ValueState<Integer> state;

        ListState<Long> updateTimes;

        @Override
        public void open(Configuration parameters) {
            ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("state", Types.INT);
            state = getRuntimeContext().getState(stateDescriptor);

            ListStateDescriptor<Long> updateDescriptor = new ListStateDescriptor<>("times", Types.LONG);
            updateTimes = getRuntimeContext().getListState(updateDescriptor);
        }

        @Override
        public void processElement(Tuple2<String,Integer> value, Context ctx, Collector<Void> out) throws Exception {
            state.update(value.f1 + 1);
            updateTimes.add(System.currentTimeMillis());
        }
    }
  • 通过定义输出类型和相应的keyedstaterereaderfunction来读取

   public static void main(String[] args) throws Exception {
        ExecutionEnvironment bEnv   = ExecutionEnvironment.getExecutionEnvironment();
        //指定checkpoint 的路径
        ExistingSavepoint savepoint =
                Savepoint.load(bEnv, "file:///D:\\IDEAspaces\\bigdata_study\\bigdata_flink\\data\\checkpoint\\3f019c9823b8dcb398b231576c583316\\chk-1", new MemoryStateBackend());
        //读取key state
        DataSet<KeyedStates> keyedState = savepoint.readKeyedState("window_uid", new ReaderFunction());
        keyedState.print();

    }
    //包装类 DataSet对外输出
    public static class KeyedStates {
        public String key;
        public long value;
        public List<Long> times;

        @Override
        public String toString() {
            return "KeyedStates{" +
                    "key='" + key + '\'' +
                    ", value=" + value +
                    ", times=" + times +
                    '}';
        }
    }
        // 作业的状态如何设置 相对应的读取的时候如何设置 一一对应
    public static class ReaderFunction extends KeyedStateReaderFunction<String, KeyedStates> {

        ValueState<Integer> state;

        ListState<Long> updateTimes;

        @Override
        public void open(Configuration parameters) {
            ValueStateDescriptor<Integer> stateDescriptor = new ValueStateDescriptor<>("state", Types.INT);
            state = getRuntimeContext().getState(stateDescriptor);

            ListStateDescriptor<Long> updateDescriptor = new ListStateDescriptor<>("times", Types.LONG);
            updateTimes = getRuntimeContext().getListState(updateDescriptor);
        }

        @Override
        public void readKey(
                String key,
                Context ctx,
                Collector<KeyedStates> out) throws Exception {

            KeyedStates data = new KeyedStates();
            data.key    = key;
            data.value  = state.value();
            data.times  = StreamSupport
                    .stream(updateTimes.get().spliterator(), false)
                    .collect(Collectors.toList());

            out.collect(data);
        }
    }
  • checkpoint state 的数据
KeyedStates{key='a', value=2, times=[1625486225091, 1625486238091, 1625486239091, 1625486239991]}
KeyedStates{key='d', value=2, times=[1625486233790, 1625486238591, 1625486239491, 1625486240391]}
KeyedStates{key='c', value=2, times=[1625486229989, 1625486238191, 1625486239191, 1625486240091]}
KeyedStates{key='2', value=2, times=[1625486245292]}
KeyedStates{key='b', value=2, times=[1625486227788, 1625486238191, 1625486239191, 1625486240091]}
KeyedStates{key='e', value=2, times=[1625486242192]}
3. Window State的读取
  • Window 作业
// 执行代码
.....
.window(TumblingProcessingTimeWindows.of(Time.minutes(5)))
                .aggregate(new ClickCounter())
                .uid("click-window")
                .print();
static class Click {
        public String userId;
        public LocalDateTime time;
        public Click(String f0, LocalDateTime localDateTime) {
            this.userId = f0;
            this.time = localDateTime;
        }
    }
//做一个简单的数据累计
    static class ClickCounter implements AggregateFunction<Click, Integer, Integer> {
        @Override
        public Integer createAccumulator() {
            return 0;
        }
        @Override
        public Integer add(Click value, Integer accumulator) {
            return 1 + accumulator;
        }
        @Override
        public Integer getResult(Integer accumulator) {
            return accumulator;
        }
        @Override
        public Integer merge(Integer a, Integer b) {
            return a + b;
        }
    }
  • Window State的读取
public static void main(String[] args) throws Exception {
        ExecutionEnvironment batchEnv = ExecutionEnvironment.getExecutionEnvironment();
        ExistingSavepoint savepoint = Savepoint.load(batchEnv,
                "file:///D:\\IDEAspaces\\bigdata_study\\bigdata_flink\\data\\checkpoint\\e2e118e23e6c3f775c9d7f477d2b9272\\chk-1",
                new MemoryStateBackend()
        );
//读取Window State
        DataSet<ClickState> clickStateDataSet = savepoint
                .window(TumblingProcessingTimeWindows.of(Time.minutes(5)))
      // 传入uid,作业执行的Agg函数,读取state函数,key类型,acc增量计算值类型,读取state函数的输出
                .aggregate("click-window", new ClickCounter(), new ClickReader(), Types.STRING, Types.INT, TypeInformation.of(ClickState.class));
  //Window State 数据展示             
        clickStateDataSet.flatMap(new FlatMapFunction<ClickState, Object>() {
            @Override
            public void flatMap(ClickState value, Collector<Object> out) throws Exception {
                out.collect(value.window.getStart()+"--"+value.window.getEnd()+":"+value.userId+":"+value.count);
            }
        }).print();
	//在当前窗口内的数据
	///1625628000000--1625628300000::1
	//1625628000000--1625628300000:a:5
	//1625628000000--1625628300000:b:2
    }

    static class ClickReader extends WindowReaderFunction<Integer, ClickState, String, TimeWindow> {
        @Override
        public void readWindow(String key, Context<TimeWindow> context, Iterable<Integer> elements, Collector<ClickState> out) throws Exception {
            ClickState state = new ClickState();
            state.userId = key;
            state.count = elements.iterator().next();
            state.window = context.window();
            state.triggerTimers = context.registeredEventTimeTimers();

            out.collect(state);
        }
    }
    
    public static class ClickState {
        public String userId;
        public int count;
        public TimeWindow window;
        public Set<Long> triggerTimers;
    }
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值