【flink学习笔记】【10】按键分区状态

1. 基本方式

自定义flatmap类,在open方法中获取状态。flatMap方法中更新状态。

public static class MyFlatMap extends RichFlatMapFunction<Events,String >{
        ValueState<Events> myStatu ;


        @Override
        public void open(Configuration parameters) throws Exception {
            myStatu = getRuntimeContext()
                    .getState(new ValueStateDescriptor<Events>("my-statu",Events.class));
        }

        @Override
        public void flatMap(Events events, Collector<String> collector) throws Exception {
            //访问和更新状态
            System.out.println(myStatu.value());
            myStatu.update(events);
            System.out.println("my value"+ myStatu.value());
        }
    }

2. 其他状态

keyedState处理流程
先注册描述器
调用富函数的getRuntimeContext
使用状态需要定义成属性(生命)
使用不同的getstate方法(在open生命周期中创建)
状态读写操作
清空状态

public static class MyFlatMap extends RichFlatMapFunction<Events,String >{
        ValueState<Events> myStatu ;
        ListState<Events> myListStatu;
        MapState<String ,Long> myMapStatu;
        ReducingState<Events> myReducingStatu;
        AggregatingState<Events, String > myAggStatu;


        @Override
        public void open(Configuration parameters) throws Exception {
            myStatu = getRuntimeContext()
                    .getState(new ValueStateDescriptor<Events>("my-statu",Events.class));
            myListStatu = getRuntimeContext()
                    .getListState(new ListStateDescriptor<Events>("my-list",Events.class));
            myMapStatu = getRuntimeContext()
                    .getMapState(new MapStateDescriptor<String, Long>("my-Map",String.class,Long.class));
            myReducingStatu = getRuntimeContext()//还需要定义聚合函数
                    .getReducingState(new ReducingStateDescriptor<Events>("MY-reduce",
                            new ReduceFunction<Events>() {
                                @Override
                                public Events reduce(Events events, Events t1) throws Exception {
                                    return new Events(events.user,events.url,t1.timestamp);
                                }
                            }, Events.class));

            myAggStatu = getRuntimeContext()
                    .getAggregatingState(new AggregatingStateDescriptor<Events, Long, String>(
                            "my-agg",
                            new AggregateFunction<Events, Long, String>() {
                                @Override
                                public Long createAccumulator() {
                                    return 0L;
                                }

                                @Override
                                public Long add(Events events, Long aLong) {
                                    return aLong+1;
                                }

                                @Override
                                public String getResult(Long aLong) {
                                    return "count"+aLong;
                                }

                                @Override
                                public Long merge(Long aLong, Long acc1) {
                                    return null;
                                }
                            },Long.class));
        }

        @Override
        public void flatMap(Events events, Collector<String> collector) throws Exception {
            //访问和更新状态
            System.out.println(myStatu.value());
            myStatu.update(events);
            System.out.println("my value"+ myStatu.value());

            myListStatu.add(events);

            myMapStatu.put(events.user,myMapStatu.get(events.user)==null?0:myMapStatu.get(events.user)+1);
            System.out.println("my map value "+events.user+""+myMapStatu.get(events.user));

            myAggStatu.add(events);
            System.out.println(myAggStatu.get());

            myReducingStatu.add(events);
            System.out.println("reduce statu"+events.user+""+myReducingStatu.get());
        }
    }

3. 值状态

案例:周期性输出pv——不开窗定时统计PV。

//统计每个用户访问量
 stream.keyBy(data->data.user)
         .process(new PeriodPVResult())
         .print();

process function

public static class PeriodPVResult extends KeyedProcessFunction<String ,Events, String >{
        //定义状态,保存PV,以及有没有定时器
        ValueState<Long> countState ;
        ValueState<Long> timerState;

        @Override
        public void open(Configuration parameters) throws Exception {
            countState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("count",Long.class));
            timerState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("timer",Long.class));
        }

        @Override
        public void processElement(Events events, Context context, Collector<String> collector) throws Exception {
            //每来一条数据,统计一次count值
            Long count = countState.value();
            countState.update(count== null?1:count+1);

            //如果么有定时器,注册定时器
            if (timerState.value() == null) {
                context.timerService().registerEventTimeTimer(events.timestamp + 10 * 1000L);
                timerState.update(events.timestamp + 10 * 1000L);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
            //定时器触发 输出统计结果
            out.collect(ctx.getCurrentKey()+"pv"+countState.value());
//            清空状态
            timerState.clear();
            //pv 状态不需要清空,需要在之前的计算结果上继续计算
            ctx.timerService().registerEventTimeTimer(timestamp+10*1000L);
            timerState.update(timestamp+10*1000L);
        }
    }

4.列表状态

列表状态,把两条流的状态进行保存,
案例:双流join

stream.keyBy(data->data.f0)
                .connect(stream2.keyBy(data->data.f0))
                .process(new CoProcessFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, String >() {
                    // 定义列表状态,两条流到达的数据
                    private ListState<Tuple3<String, String, Long>> streamListState;
                    private ListState<Tuple3<String, String, Long>> stream2ListState2;

                    @Override
                    public void open(Configuration parameters) throws Exception {
                        streamListState = getRuntimeContext().getListState(new ListStateDescriptor<Tuple3<String, String, Long>>("stream1.list", Types.TUPLE(Types.STRING,Types.STRING,Types.LONG)));
                        stream2ListState2 = getRuntimeContext().getListState(new ListStateDescriptor<Tuple3<String, String, Long>>("stream2.list", Types.TUPLE(Types.STRING,Types.STRING,Types.LONG)));
                    }

                    @Override
                    public void processElement1(Tuple3<String, String, Long> left, Context context, Collector<String> collector) throws Exception {
                        //获取另一条流的所有数据配对输出
                        for(Tuple3<String ,String ,Long> right:stream2ListState2.get()){
                            collector.collect("left:"+left.f0+""+left.f2+"=>"+right);
                        }
                        streamListState.add(Tuple3.of(left.f0,left.f1,left.f2));
                    }

                    @Override
                    public void processElement2(Tuple3<String, String, Long> right, Context context, Collector<String> collector) throws Exception {
                        for(Tuple3<String ,String ,Long> left:stream2ListState2.get()){
                            collector.collect("right:"+right.f0+""+right.f2+"=>"+left);
                        }
                        stream2ListState2.add(Tuple3.of(right.f0,right.f1,right.f2));
                    }
                }
                ).print();

5.映射状态

mapstate 模拟滚动窗口。

stream.keyBy(data->data.url)
                .process(new FakeWindowResult(10000L))
                .print();
public static  class FakeWindowResult extends KeyedProcessFunction<String ,Events,String >{
        private Long windowSize;

        public FakeWindowResult(Long windowSize) {
            this.windowSize = windowSize;
        }

        //定义mapstate保存每个窗口的count值
        MapState<Long, Long> windowUrlCountMapState;

        @Override
        public void open(Configuration parameters) throws Exception {
            windowUrlCountMapState = getRuntimeContext().getMapState(new MapStateDescriptor<Long, Long>("WINDOW-COUNT",Long.class,Long.class));
        }

        @Override
        public void processElement(Events events, Context context, Collector<String> collector) throws Exception {
            //每来一条数据,根据时间戳判断属于哪个窗口《窗口分配器》
            Long windowStart = events.timestamp/windowSize*windowSize;
            Long windowEnd = windowStart + windowSize;

            //注册end-1个定时器
            context.timerService().registerEventTimeTimer(windowEnd-1);

            //更新状态,增量聚合
            if(windowUrlCountMapState.contains(windowStart)){
                Long count = windowUrlCountMapState.get(windowStart);
                windowUrlCountMapState.put(windowStart, count+1);
            }else{
                windowUrlCountMapState.put(windowStart,1L);
            }
        }

        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<String> out) throws Exception {
            Long windowEnd = timestamp+1;
            Long windowStart = windowEnd-windowSize;
            Long count = windowUrlCountMapState.get(windowStart);

            out.collect("窗口"+new Timestamp(windowStart)+"-"+new Timestamp(windowEnd)
                    +"url"+ctx.getCurrentKey()
                    +"count"+count
            );

            //模拟窗口关闭,清除map中的keyvalue
            windowUrlCountMapState.remove(windowStart);
        }
    }

6.聚合状态

案例:每隔5个数据统计平均时间戳检验疏密程度

stream.keyBy(data->data.user)
                .flatMap(new AvgTsResult(5L))
                .print();
//实现自定义richFlatMapfunc
public static class AvgTsResult extends RichFlatMapFunction<Events,String >{
    private Long count;

    public AvgTsResult(Long count) {
        this.count = count;
    }

    //定义聚合状态,保存平均时间戳
    AggregatingState<Events,Long> avgTsAggState;

    //定义值状态保存访问次数
    ValueState<Long> countState;

    @Override
    public void open(Configuration parameters) throws Exception {
        avgTsAggState = getRuntimeContext().getAggregatingState(new AggregatingStateDescriptor<Events, Tuple2<Long,Long>, Long>(
                "avg-ts",
                new AggregateFunction<Events, Tuple2<Long, Long>, Long>() {
                    @Override
                    public Tuple2<Long, Long> createAccumulator() {
                        return Tuple2.of(0L,0L);
                    }

                    @Override
                    public Tuple2<Long, Long> add(Events events, Tuple2<Long, Long> longLongTuple2) {
                        return Tuple2.of(longLongTuple2.f0+events.timestamp,longLongTuple2.f1+1);
                    }

                    @Override
                    public Long getResult(Tuple2<Long, Long> longLongTuple2) {
                        return longLongTuple2.f0/longLongTuple2.f1;
                    }

                    @Override
                    public Tuple2<Long, Long> merge(Tuple2<Long, Long> longLongTuple2, Tuple2<Long, Long> acc1) {
                        return null;
                    }
                }
                , Types.TUPLE(Types.LONG, Types.LONG)

        ));

        countState = getRuntimeContext().getState(new ValueStateDescriptor<Long>("count",Long.class));
    }



    @Override
    public void flatMap(Events events, Collector<String> collector) throws Exception {
        //每来一条,count+1
        Long currCount = countState.value();
        if(currCount == null){
            currCount=1L;
        }else{
            currCount++;
        }

        //更新状态
        countState.update(currCount);
        avgTsAggState.add(events);

        //count达到次数输出结果
        if(currCount.equals(count)){
            collector.collect(events.user+"过去"+count+"次"+"访问平均时间戳是:"+avgTsAggState.get());
            //清理状态
            countState.clear();
            avgTsAggState.clear();
        }
    }
}

7.状态生存时间(TTL)

如果状态在内存里的存在时间已经超过TTL,clear它,

ValueStateDescriptor<Events> valueStateDescriptor = new ValueStateDescriptor<>("my-statu", Events.class);

//配置ttl
StateTtlConfig ttlConfig = StateTtlConfig.newBuilder(Time.hours(1))//状态超过一小时则清掉
        .setUpdateType(StateTtlConfig.UpdateType.OnReadAndWrite)//什么时候更新状态的失效时间,写操作、创建操作
        .setStateVisibility(StateTtlConfig.StateVisibility.ReturnExpiredIfNotCleanedUp)//状态可见性
        .build();

valueStateDescriptor.enableTimeToLive(ttlConfig);//将ttl配给值状态描述器
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值