状态编程求窗口差值

package com.mz.iot.test;

import com.mz.iot.optional.MyContinuousEventTimeTrigger;
import com.mz.iot.utils.DateUtil;
import com.mz.iot.utils.FlinkUtils;
import com.mz.iot.utils.LogUtils;
import lombok.*;
import org.apache.commons.compress.utils.Lists;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.common.typeinfo.Types;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import java.util.ArrayList;
import java.util.Comparator;
import java.util.Objects;


/**
 * 求窗口内最大元素与最小元素的差值
 * 注意点:最小元素是前一个窗口的最大值
 */
public class TestWindowDiff {

    public static void main(String[] args) throws Exception {

        StreamExecutionEnvironment env = FlinkUtils.createEnv("");

        /**
         * 输入数据格式
         * a,2020-10-01 00:12:08,1
         * a,2020-10-01 00:12:38,3
         * a,2020-10-01 00:13:08,5
         *
         * a,2020-10-01 00:16:08,10
         * a,2020-10-01 00:17:08,11
         *
         * a,2020-10-01 00:31:08,15
         * a,2020-10-01 00:31:12,18
         * a,2020-10-01 00:32:13,20
         *
         * a,2020-10-01 00:46:12,28
         * a,2020-10-01 00:49:13,38
         * a,2020-10-01 00:53:13,39
         *
         * a,2020-10-01 01:01:13,41
         */
        DataStream<String> socket = env.socketTextStream("mz-hadoop-03", 7777);

        socket.print("socket stream");

        SingleOutputStreamOperator<Event> mainStream = socket.map(new MapFunction<String, Event>() {
            @Override
            public Event map(String value) throws Exception {
                String[] arr = value.split(",");
                return new Event(arr[0].trim(), arr[1], Integer.parseInt(arr[2]));
            }
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Event>(Time.seconds(5)) {
            @Override
            public long extractTimestamp(Event element) {
                return DateUtil.getMillsFromString(element.getTime());
            }
        });


        SingleOutputStreamOperator<WindowResult> windowResultStream = mainStream.keyBy("kind")
                .window(TumblingEventTimeWindows.of(Time.minutes(15), Time.minutes(0)))
                .trigger(MyContinuousEventTimeTrigger.of(Time.seconds(30)))
                .aggregate(new TriggerAgg(), new DiffWindowResult())
                .keyBy("kind")
                .process(new MyKeyedProcessFunction());

        windowResultStream.print("resultStream====>");

        env.execute("test window diff with socket stream");

    }


    public static class MyKeyedProcessFunction extends KeyedProcessFunction<Tuple, WindowResult, WindowResult> {


        /**
         * 生命周期,keyedState,key变,state内容重置
         */
        ListState<WindowResult> listState;
        ValueState<Integer> lastMaxState;

        @Override
        public void open(Configuration parameters) throws Exception {
            super.open(parameters);

            /**
             * if set default,state.get()==null is false,otherwise true
             */
            lastMaxState = getRuntimeContext().getState(new ValueStateDescriptor<>("last-max", Types.INT));
            listState = getRuntimeContext().getListState(new ListStateDescriptor<WindowResult>("item-result-list", WindowResult.class));

        }


        /**
         * 窗口内的实时差值,有误差
         */
        @Override
        public void processElement(WindowResult value, Context ctx, Collector<WindowResult> out) throws Exception {
            if (!Lists.newArrayList(listState.get().iterator()).contains(value)) {
                listState.add(value);
            }

            ctx.timerService().registerEventTimeTimer(value.getWindowEnd() + 5000);

            value.setUseCnt(value.getValue() - value.getMinValue());

            out.collect(value);
        }


        /**
         * 窗口结束时候对差值进行修正
         */
        @Override
        public void onTimer(long timestamp, OnTimerContext ctx, Collector<WindowResult> out) throws Exception {
            super.onTimer(timestamp, ctx, out);

            System.out.println("onTime() exec:" + DateUtil.getDateStrFromMill(timestamp));

            ArrayList<WindowResult> windowResults = Lists.newArrayList(listState.get().iterator());

            for (WindowResult windowResult : windowResults) {
                System.out.println("本窗口" + DateUtil.getDateStrFromMill(timestamp) + "所有数据:" + windowResult);
            }

            windowResults.sort(new Comparator<WindowResult>() {
                @Override
                public int compare(WindowResult o1, WindowResult o2) {
                    return o2.getMaxValue() - o1.getMaxValue();
                }
            });

            /**
             * WindowResult{kind='a', value=8, maxValue=8, minValue=1, windowStart=2020-10-01 00:00:00, windowEnd=2020-10-01 00:15:00}
             */
            WindowResult windowResultMax = windowResults.get(0);

            /**
             * 默认为本窗口最小值,需要修正
             */
            Integer lastV = windowResultMax.getMinValue();

            if (lastMaxState.value() == null) {
                lastMaxState.update(windowResultMax.getMaxValue());//1
                System.out.println("上个窗口状态为空,最大值取最小值:" + lastV);
            } else {
                lastV = lastMaxState.value();
                System.out.println("上个窗口状态不为空,取最大值从状态取:" + lastV);
                lastMaxState.update(windowResultMax.getMaxValue());//8
            }

            /**
             * 最小值要修正为上一个窗口的最大值
             */

            windowResultMax.setUseCnt(windowResultMax.getMaxValue() - lastV);
            out.collect(windowResultMax);

            listState.clear();
        }
    }


    /**
     * 一次触发返回的聚合数据
     */
    public static class TriggerAgg implements AggregateFunction<Event, WindowResult, WindowResult> {
        @Override
        public WindowResult createAccumulator() {

            WindowResult windowResult = new WindowResult();
            windowResult.setMinValue(Integer.MAX_VALUE);
            windowResult.setMaxValue(Integer.MIN_VALUE);
            return windowResult;
        }


        @Override
        public WindowResult add(Event value, WindowResult accumulator) {

            System.out.println("value in acc:" + value);

            /**
             * 该批次类的最值
             */
            int max = Math.max(accumulator.getMaxValue(), value.getValue());

            /**
             * 该最小值在窗口中要替换为整个窗口的最小值
             */
            int min = Math.min(accumulator.getMinValue(), value.getValue());

            accumulator.setKind(value.getKind());
            accumulator.setValue(value.getValue());
            accumulator.setMaxValue(max);
            accumulator.setMinValue(min);

            return accumulator;
        }

        @Override
        public WindowResult getResult(WindowResult accumulator) {
            return accumulator;
        }

        @Override
        public WindowResult merge(WindowResult a, WindowResult b) {
            return null;
        }
    }


    public static class DiffWindowResult extends ProcessWindowFunction<WindowResult, WindowResult, Tuple, TimeWindow> {
        @Override
        public void process(Tuple tuple, Context context, Iterable<WindowResult> elements, Collector<WindowResult> out) throws Exception {


            String t_start = DateUtil.getDateStrFromMill(context.window().getStart());
            String t_end = DateUtil.getDateStrFromMill(context.window().getEnd());

            String wm = DateUtil.getDateStrFromMill(context.currentWatermark());

            LogUtils.info("窗口边界:[" + t_start + "," + t_end + "),水位线:" + wm);

            ValueState<WindowResult> minState = context.windowState().getState(new ValueStateDescriptor<WindowResult>("min", WindowResult.class));

            System.out.println("窗口内缓存的最小状态:" + minState.value());

            WindowResult event = elements.iterator().next();

            if (minState.value() != null) {
                if (event.value <= minState.value().getValue()) {
                    minState.update(event);
                }

            } else {
                minState.update(event);
            }

            /**
             * 补充窗口信息
             * 修正最小值
             */
            event.setWindowStart(context.window().getStart());
            event.setWindowEnd(context.window().getEnd());
            event.setMinValue(minState.value().getMinValue());

            out.collect(event);
        }
    }


    @Data
    @NoArgsConstructor
    @AllArgsConstructor
    public static class Event {
        private String kind;
        private String time;
        private int value;
    }

    @Getter
    @Setter
    @NoArgsConstructor
    @AllArgsConstructor
    public static class WindowResult {

        private String kind;
        private int value;//当前批次的实时值
        private int maxValue;//当前批次的最大值
        private int minValue;//当前批次的最小值
        private long windowStart;//当前批次所在窗口起始
        private long windowEnd;//当前批次所在窗口结束
        private long useCnt;

        @Override
        public String toString() {
            return "WindowResult{" +
                    "kind='" + kind + '\'' +
                    ", value=" + value +
                    ", maxValue=" + maxValue +
                    ", minValue=" + minValue +
                    ", useCnt=" + useCnt +
                    ", windowStart=" + DateUtil.getDateStrFromMill(windowStart) +
                    ", windowEnd=" + DateUtil.getDateStrFromMill(windowEnd) +
                    '}';
        }

        @Override
        public boolean equals(Object o) {
            if (this == o) return true;
            if (o == null || getClass() != o.getClass()) return false;
            WindowResult that = (WindowResult) o;
            return value == that.value && maxValue == that.maxValue && minValue == that.minValue && windowStart == that.windowStart && windowEnd == that.windowEnd && kind.equals(that.kind);
        }

        @Override
        public int hashCode() {
            return Objects.hash(kind, value, maxValue, minValue, windowStart, windowEnd);
        }
    }
}

 

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值