flink水位线案例

前言:

        结合上个水位线知识点做出的题目案例给予以下代码作为参考。

例题:

1.创建Flink流处理环境。

//创建流环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();


env.execute();

2.从“access.txt”文件中获取数据源。

        资源中查询数据源

3.从中过滤出包含有page字段的数据。

//过滤数据,转换数据
        SingleOutputStreamOperator<UserEvent> stream1 = stream.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {
                //value是一个json字符串
                JSONObject jsonObject = JSONObject.parseObject(value);
                JSONObject page = jsonObject.getJSONObject("page");
                JSONObject common = jsonObject.getJSONObject("common");
                //只保留page和common不为空数据
                return page != null && common != null;
            }
        }).map(new MapFunction<String, UserEvent>() {
            @Override
            public UserEvent map(String value) throws Exception {
                //value是一个JSON字符串
                JSONObject jsonObject = JSONObject.parseObject(value);
                //先取uid
                String uid = jsonObject.getJSONObject("common").getString("uid");
                //再取page_id
                String pageId = jsonObject.getJSONObject("page").getString("page_id");
                //取ts
                Long ts = jsonObject.getLong("ts");
                return new UserEvent(uid, pageId, ts);
            }
        });

4.设置时间戳及单调递增水位线。

SingleOutputStreamOperator<UserEvent> stream2 = stream1.assignTimestampsAndWatermarks(
                WatermarkStrategy.<UserEvent>forMonotonousTimestamps()
                        .withTimestampAssigner((event, ts) -> event.getTs())
        );

5.统计每十秒钟的用户访客量pv,以及窗口开始时间和结束时间,并输出至控制台。

SingleOutputStreamOperator<Tuple3<String, String, Long>> stream1001 = stream2.windowAll(TumblingEventTimeWindows.of(Time.seconds(10)))
                .process(new ProcessAllWindowFunction<UserEvent, Tuple3<String, String, Long>, TimeWindow>() {
                    /**
                     *
                     * @param context 上下文对象 -- 拿到窗口信息
                     * @param iterable 10秒钟累积的数据
                     * @param collector 采集器
                     * @throws Exception
                     */
                    @Override
                    public void process(Context context, Iterable<UserEvent> iterable, Collector<Tuple3<String, String, Long>> collector) throws Exception {
                        TimeWindow window = context.window();//窗口对象
                        long start = window.getStart();//窗口开始时间戳
                        long end = window.getEnd();//窗口结束时间戳
                        long pv = 0L;
                        for (UserEvent event : iterable) {
                            pv++;//统计页面浏览量
                        }
                        //格式话成字符串输出
                        String strStart = DateFormatUtils.format(start, "yyyy-MM-dd HH:mm:ss");
                        String strEnd = DateFormatUtils.format(end, "yyyy-MM-dd HH:mm:ss");
                        //采集器输出
                        collector.collect(Tuple3.of(strStart, strEnd, pv));
                    }
                });
        stream1001.print();

6.统计每十秒钟的独立用户访客量uv,以及窗口开始时间和结束时间,并输出至控制台。

SingleOutputStreamOperator<Tuple3<String, String, Long>> stream1002 = stream2.keyBy(value -> value.getUid()).window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .process(new ProcessWindowFunction<UserEvent, Tuple3<String, String, Long>, String, TimeWindow>() {
                    /**
                     *
                     * @param s 键名(组名)
                     * @param context
                     * @param iterable
                     * @param collector
                     * @throws Exception
                     */
                    @Override
                    public void process(String s, Context context, Iterable<UserEvent> iterable, Collector<Tuple3<String, String, Long>> collector) throws Exception {
                        TimeWindow window = context.window();//窗口对象
                        long start = window.getStart();//窗口开始时间戳
                        long end = window.getEnd();//窗口结束时间戳
                        long uv = 1;
                        //格式话成字符串输出
                        String strStart = DateFormatUtils.format(start, "yyyy-MM-dd HH:mm:ss");
                        String strEnd = DateFormatUtils.format(end, "yyyy-MM-dd HH:mm:ss");
                        //采集器输出三元组
                        collector.collect(Tuple3.of(strStart, strEnd, 1L));
                    }
                }).keyBy(value -> value.f0).sum(2);
        stream1002.print();

7.根据窗口开始和结束时间,使用join或者union将pv,uv数据进行汇总。

stream1001.join(stream1002)
                .where(value1 -> value1.f0)
                .equalTo(value2 -> value2.f0)
                .window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .apply(new JoinFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, String>() {
                    @Override
                    public String join(Tuple3<String, String, Long> first, Tuple3<String, String, Long> second) throws Exception {
                        return "开始时间:"+first.f0+"结束时间:"+first.f1+",PV="+first.f2+",UV="+second.f2;
                    }
                }).print();

 8.统计每十秒钟较前十秒PV的变化,如果增加则输出正数,减少则输出负数

//另外编写一个方法 调用即可
public static class pvSumFunction extends ProcessAllWindowFunction<UserEvent, String, TimeWindow>{

        private transient ValueState<Long> lastPvValueState;
        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);

            //定义描述器对象
            ValueStateDescriptor<Long> descriptor = new ValueStateDescriptor<Long>(
                    "lastPv", // the state name
//                    TypeInformation.of(new TypeHint<Long>() {}), // 描述类型
                    Types.LONG

            );
            //在这里初始化状态对象
            lastPvValueState = getRuntimeContext().getState(descriptor);
        }

        //处理函数
        @Override
        public void process(Context context, Iterable<UserEvent> iterable, Collector<String> collector) throws Exception {
            //统计当前窗口的pv
            Long pv = 0L;
            for (UserEvent event : iterable) {
                pv++;
            }
            //上一个窗口的pv
            Long lastPv = 0L;
            Long value = lastPvValueState.value();
            if (value!=null){
                lastPv = value;
            }
            //差值
            long chaZhi = pv - lastPv;
            //把当前窗口的pv写回去
            lastPvValueState.update(pv);
            //输出结果(开始时间,结束时间)
            String start = DateFormatUtils.format(context.window().getStart(), "yyyy-MM-dd HH:mm:ss");
            String ebd = DateFormatUtils.format(context.window().getEnd(), "yyyy-MM-dd HH:mm:ss");
            collector.collect("["+start+"-->"+ebd+")的pv是:"+pv+"-->与上一个10秒的差值是"+chaZhi);
        }
    }

9.统计每个十秒pv的前三值

public static class pvTopNFunction extends ProcessAllWindowFunction<UserEvent, String, TimeWindow>{

        //所有窗口的pv值都存里面
        private transient MapState<Long,Long> pvMapState;
        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            //初始化
            pvMapState = getRuntimeContext().getMapState(new MapStateDescriptor<Long, Long>(
                    "pvMapState",
                    Types.LONG,
                    Types.LONG
            ));
        }

        @Override
        public void process(Context context, Iterable<UserEvent> iterable, Collector<String> collector) throws Exception {
            //map是通过键来存取的
            //拿到窗口开始的时间戳
            long start = context.window().getStart();

            //计算本窗口的pv
            Long pv=0L;
            for (UserEvent event : iterable) {
                pv++;
            }
            //获取pvMapState里面的值
            pvMapState.put(start,pv);

            //排序并且获取Top3
            Iterable<Map.Entry<Long, Long>> entries = pvMapState.entries();//取出map的键值对
            //定义一个Map
            Map<Long,Long>pvMap=new HashMap<>();

            for (Map.Entry<Long, Long> entry : entries) {
                pvMap.put(entry.getKey(),entry.getValue());
            }
            //排序(用的是Java里面的stream Api)
            List<Map.Entry<Long, Long>> top3 = pvMap.entrySet().stream()
                    .sorted(Map.Entry.comparingByValue(Comparator.reverseOrder()))//comparingByValue按值排序,reverseOrder降序排序
                    .limit(3)//取前3条
                    .collect(Collectors.toList());//采集

            //转换为字符串输出
            String rs="前三名是:\n";
            for (Map.Entry<Long, Long> longLongEntry : top3) {
                Long startMilli=longLongEntry.getKey();
                String strStart = DateFormatUtils.format(startMilli, "yyyy-MM-dd HH:mm:ss");
                rs+="开始时间是:"+strStart+",pv是:"+longLongEntry.getValue()+"\n";
            }
            collector.collect(rs);
        }

    }

代码总结:

public class UvPvDemo1 {
    public static void main(String[] args) throws Exception {
        //创建流环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置平行度
        env.setParallelism(5);
        //把处理方式设置为批处理模式
//        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        //读取文件
        DataStreamSource<String> stream = env.readTextFile("D:\\workidea\\Flink1001\\input\\access.txt");
        //过滤数据,转换数据
        SingleOutputStreamOperator<UserEvent> stream1 = stream.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {
                //value是一个json字符串
                JSONObject jsonObject = JSONObject.parseObject(value);
                JSONObject page = jsonObject.getJSONObject("page");
                JSONObject common = jsonObject.getJSONObject("common");
                //只保留page和common不为空数据
                return page != null && common != null;
            }
        }).map(new MapFunction<String, UserEvent>() {
            @Override
            public UserEvent map(String value) throws Exception {
                //value是一个JSON字符串
                JSONObject jsonObject = JSONObject.parseObject(value);
                //先取uid
                String uid = jsonObject.getJSONObject("common").getString("uid");
                //再取page_id
                String pageId = jsonObject.getJSONObject("page").getString("page_id");
                //取ts
                Long ts = jsonObject.getLong("ts");
                return new UserEvent(uid, pageId, ts);
            }
        });

        //设置水位线策略
        SingleOutputStreamOperator<UserEvent> stream2 = stream1.assignTimestampsAndWatermarks(
                WatermarkStrategy.<UserEvent>forMonotonousTimestamps()
                        .withTimestampAssigner((event, ts) -> event.getTs())
        );

        //开窗(统计每十秒钟的用户访客量pv,以及窗口开始时间和结束时间,并输出至控制台)
        SingleOutputStreamOperator<Tuple3<String, String, Long>> stream1001 = stream2.windowAll(TumblingEventTimeWindows.of(Time.seconds(10)))
                .process(new ProcessAllWindowFunction<UserEvent, Tuple3<String, String, Long>, TimeWindow>() {
                    /**
                     *
                     * @param context 上下文对象 -- 拿到窗口信息
                     * @param iterable 10秒钟累积的数据
                     * @param collector 采集器
                     * @throws Exception
                     */
                    @Override
                    public void process(Context context, Iterable<UserEvent> iterable, Collector<Tuple3<String, String, Long>> collector) throws Exception {
                        TimeWindow window = context.window();//窗口对象
                        long start = window.getStart();//窗口开始时间戳
                        long end = window.getEnd();//窗口结束时间戳
                        long pv = 0L;
                        for (UserEvent event : iterable) {
                            pv++;//统计页面浏览量
                        }
                        //格式话成字符串输出
                        String strStart = DateFormatUtils.format(start, "yyyy-MM-dd HH:mm:ss");
                        String strEnd = DateFormatUtils.format(end, "yyyy-MM-dd HH:mm:ss");
                        //采集器输出
                        collector.collect(Tuple3.of(strStart, strEnd, pv));
                    }
                });
//        stream1001.print();

        //(开始时间,结束数据,1)-》分组聚合(开始时间,结束时间,uv总和)
        //(统计每十秒钟的独立用户访客量uv,以及窗口开始时间和结束时间,并输出至控制台)
        SingleOutputStreamOperator<Tuple3<String, String, Long>> stream1002 = stream2.keyBy(value -> value.getUid()).window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .process(new ProcessWindowFunction<UserEvent, Tuple3<String, String, Long>, String, TimeWindow>() {
                    /**
                     *
                     * @param s 键名(组名)
                     * @param context
                     * @param iterable
                     * @param collector
                     * @throws Exception
                     */
                    @Override
                    public void process(String s, Context context, Iterable<UserEvent> iterable, Collector<Tuple3<String, String, Long>> collector) throws Exception {
                        TimeWindow window = context.window();//窗口对象
                        long start = window.getStart();//窗口开始时间戳
                        long end = window.getEnd();//窗口结束时间戳
                        long uv = 1;
                        //格式话成字符串输出
                        String strStart = DateFormatUtils.format(start, "yyyy-MM-dd HH:mm:ss");
                        String strEnd = DateFormatUtils.format(end, "yyyy-MM-dd HH:mm:ss");
                        //采集器输出三元组
                        collector.collect(Tuple3.of(strStart, strEnd, 1L));
                    }
                }).keyBy(value -> value.f0).sum(2);
//        stream1002.print();
//
        //(根据窗口开始和结束时间,使用join或者union将pv,uv数据进行汇总)
        stream1001.join(stream1002)
                .where(value1 -> value1.f0)
                .equalTo(value2 -> value2.f0)
                .window(TumblingEventTimeWindows.of(Time.seconds(10)))
                .apply(new JoinFunction<Tuple3<String, String, Long>, Tuple3<String, String, Long>, String>() {
                    @Override
                    public String join(Tuple3<String, String, Long> first, Tuple3<String, String, Long> second) throws Exception {
                        return "开始时间:"+first.f0+"结束时间:"+first.f1+",PV="+first.f2+",UV="+second.f2;
                    }
                }).print();



        env.execute();
    }
}
public class UvPvDemo2 {
    public static void main(String[] args) throws Exception {
        //创建流环境
        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        //设置平行度
        env.setParallelism(1);
        //把处理方式设置为批处理模式
        env.setRuntimeMode(RuntimeExecutionMode.BATCH);
        //读取文件
        DataStreamSource<String> stream = env.readTextFile("D:\\workidea\\Flink1001\\input\\access.txt");
        //过滤数据,转换数据
        SingleOutputStreamOperator<UserEvent> stream1 = stream.filter(new FilterFunction<String>() {
            @Override
            public boolean filter(String value) throws Exception {
                //value是一个json字符串
                JSONObject jsonObject = JSONObject.parseObject(value);
                JSONObject page = jsonObject.getJSONObject("page");
                JSONObject common = jsonObject.getJSONObject("common");
                //只保留page和common不为空数据
                return page != null && common != null;
            }
        }).map(new MapFunction<String, UserEvent>() {
            @Override
            public UserEvent map(String value) throws Exception {
                //value是一个JSON字符串
                JSONObject jsonObject = JSONObject.parseObject(value);
                //先取uid
                String uid = jsonObject.getJSONObject("common").getString("uid");
                //再取page_id
                String pageId = jsonObject.getJSONObject("page").getString("page_id");
                //取ts
                Long ts = jsonObject.getLong("ts");
                return new UserEvent(uid, pageId, ts);
            }
        });

        //设置水位线策略
        SingleOutputStreamOperator<UserEvent> stream2 = stream1.assignTimestampsAndWatermarks(
                WatermarkStrategy.<UserEvent>forMonotonousTimestamps()
                        .withTimestampAssigner((event, ts) -> event.getTs())
        );

        //统计每十秒钟较前十秒PV的变化,如果增加则输出正数,减少则输出负数
        stream2
                //滚动窗口
                .windowAll(TumblingEventTimeWindows.of(Time.seconds(10)))
                //滑动窗口
//                .windowAll(SlidingEventTimeWindows.of(Time.seconds(10),Time.seconds(5)))
                //会话窗口,需要指定会话中断间隔时间
//                .windowAll(EventTimeSessionWindows.withGap(Time.seconds(3)))
                .process(new pvTopNFunction()).print();

//        stream2.print();


        env.execute();
    }

    public static class pvSumFunction extends ProcessAllWindowFunction<UserEvent, String, TimeWindow>{

        private transient ValueState<Long> lastPvValueState;
        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);

            //定义描述器对象
            ValueStateDescriptor<Long> descriptor = new ValueStateDescriptor<Long>(
                    "lastPv", // the state name
//                    TypeInformation.of(new TypeHint<Long>() {}), // 描述类型
                    Types.LONG

            );
            //在这里初始化状态对象
            lastPvValueState = getRuntimeContext().getState(descriptor);
        }

        //处理函数
        @Override
        public void process(Context context, Iterable<UserEvent> iterable, Collector<String> collector) throws Exception {
            //统计当前窗口的pv
            Long pv = 0L;
            for (UserEvent event : iterable) {
                pv++;
            }
            //上一个窗口的pv
            Long lastPv = 0L;
            Long value = lastPvValueState.value();
            if (value!=null){
                lastPv = value;
            }
            //差值
            long chaZhi = pv - lastPv;
            //把当前窗口的pv写回去
            lastPvValueState.update(pv);
            //输出结果(开始时间,结束时间)
            String start = DateFormatUtils.format(context.window().getStart(), "yyyy-MM-dd HH:mm:ss");
            String ebd = DateFormatUtils.format(context.window().getEnd(), "yyyy-MM-dd HH:mm:ss");
            collector.collect("["+start+"-->"+ebd+")的pv是:"+pv+"-->与上一个10秒的差值是"+chaZhi);
        }
    }

    public static class pvTopNFunction extends ProcessAllWindowFunction<UserEvent, String, TimeWindow>{

        //所有窗口的pv值都存里面
        private transient MapState<Long,Long> pvMapState;
        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);
            //初始化
            pvMapState = getRuntimeContext().getMapState(new MapStateDescriptor<Long, Long>(
                    "pvMapState",
                    Types.LONG,
                    Types.LONG
            ));
        }

        @Override
        public void process(Context context, Iterable<UserEvent> iterable, Collector<String> collector) throws Exception {
            //map是通过键来存取的
            //拿到窗口开始的时间戳
            long start = context.window().getStart();

            //计算本窗口的pv
            Long pv=0L;
            for (UserEvent event : iterable) {
                pv++;
            }
            //获取pvMapState里面的值
            pvMapState.put(start,pv);

            //排序并且获取Top3
            Iterable<Map.Entry<Long, Long>> entries = pvMapState.entries();//取出map的键值对
            //定义一个Map
            Map<Long,Long>pvMap=new HashMap<>();

            for (Map.Entry<Long, Long> entry : entries) {
                pvMap.put(entry.getKey(),entry.getValue());
            }
            //排序(用的是Java里面的stream Api)
            List<Map.Entry<Long, Long>> top3 = pvMap.entrySet().stream()
                    .sorted(Map.Entry.comparingByValue(Comparator.reverseOrder()))//comparingByValue按值排序,reverseOrder降序排序
                    .limit(3)//取前3条
                    .collect(Collectors.toList());//采集

            //转换为字符串输出
            String rs="前三名是:\n";
            for (Map.Entry<Long, Long> longLongEntry : top3) {
                Long startMilli=longLongEntry.getKey();
                String strStart = DateFormatUtils.format(startMilli, "yyyy-MM-dd HH:mm:ss");
                rs+="开始时间是:"+strStart+",pv是:"+longLongEntry.getValue()+"\n";
            }
            collector.collect(rs);
        }

    }
}

评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值