flink的窗口状态的计算

结论:flink的窗口计算是有状态的计算,并且是自动维护状态,持久化到外部系统

以下是验证过程


import com.mz.iot.utils.DateUtil;
import org.apache.flink.api.common.functions.AggregateFunction;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.runtime.state.filesystem.FsStateBackend;
import org.apache.flink.streaming.api.CheckpointingMode;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

/**
 * 采用奇数偶数求和的案例验证window的state是否自动维护
 */
public class TestWindowState {


    public static void main(String[] args) throws Exception {


        StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();

        env.setParallelism(1);
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);

        env.enableCheckpointing(1000, CheckpointingMode.EXACTLY_ONCE);
        env.getCheckpointConfig().setCheckpointTimeout(60000L);
        env.getCheckpointConfig().setMaxConcurrentCheckpoints(2);
        env.getCheckpointConfig().setMinPauseBetweenCheckpoints(100L);
        env.getCheckpointConfig().setPreferCheckpointForRecovery(true);
        env.getCheckpointConfig().setTolerableCheckpointFailureNumber(1);
        env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);

        env.setStateBackend(new FsStateBackend("hdfs://mz-hadoop-01:8020/ck"));

        /**
         * 对于使其宕机的数据,重启忽略该数据,就像这个数据从来没有输入过
         */
        env.setRestartStrategy(RestartStrategies.fixedDelayRestart(0, 3000L));


        /**
         * 输入数据格式
         * 5,2020-10-01 00:12:08
         * 5,2020-10-01 00:13:08
         * 5,2020-10-01 00:14:08
         *
         * 5,2020-10-01 00:15:08
         * 5,2020-10-01 00:16:08
         * 5,2020-10-01 00:17:08
         *
         * 5,2020-10-01 00:30:08
         */
        DataStreamSource<String> socket = env.socketTextStream("192.168.0.162", 7777);

        socket.print("socket stream========>");


        SingleOutputStreamOperator<Tuple3<String, Integer, String>> mapStream = socket.map(new MapFunction<String, Tuple3<String, Integer, String>>() {
            @Override
            public Tuple3<String, Integer, String> map(String value) {

                if ("x".equals(value)) {

                    System.out.println("exit:" + (1 / 0));

                }

                String[] s = value.split(",");

                if (s.length == 2) {

                    int v = Integer.parseInt(s[0]);
                    String ts = s[1];

                    String k = String.valueOf(v % 2);

                    return Tuple3.of(k, v, ts);
                }
                return Tuple3.of("null", 0, "9999-99-99 00:00:00");

            }
        });

        mapStream.print("mapStream===>");


        SingleOutputStreamOperator<Tuple3<String, Integer, String>> filterStream = mapStream.filter(new FilterFunction<Tuple3<String, Integer, String>>() {
            @Override
            public boolean filter(Tuple3<String, Integer, String> value) throws Exception {
                return !"null".equals(value.f0);
            }
        }).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor<Tuple3<String, Integer, String>>(Time.seconds(1)) {
            @Override
            public long extractTimestamp(Tuple3<String, Integer, String> element) {
                return DateUtil.getMillsFromString(element.f2);
            }
        });

        filterStream.print("filterStream====>");

        KeyedStream<Tuple3<String, Integer, String>, Tuple> keyedStream = filterStream
                .keyBy(0);


        keyedStream.print("keyed stream======>");
 SingleOutputStreamOperator<OddEven> windowResult = keyedStream
                .window(TumblingEventTimeWindows.of(Time.minutes(15), Time.minutes(0)))
                .aggregate(new AggregateFunction<Tuple3<String, Integer, String>, OddEven, OddEven>() {

                               @Override
                               public OddEven createAccumulator() {
                                   return new OddEven();
                               }

                               @Override
                               public OddEven add(Tuple3<String, Integer, String> value, OddEven accumulator) {

                                   accumulator.setSum(accumulator.getSum() + value.f1);

                                   return accumulator;
                               }

                               @Override
                               public OddEven getResult(OddEven accumulator) {
                                   return accumulator;
                               }

                               @Override
                               public OddEven merge(OddEven a, OddEven b) {
                                   return null;
                               }
                           },
                        new ProcessWindowFunction<OddEven, OddEven, Tuple, TimeWindow>() {
                            @Override
                            public void process(Tuple tuple, Context context, Iterable<OddEven> elements, Collector<OddEven> out) throws Exception {

                                TimeWindow window = context.window();

                                String t_start = DateUtil.getDateStrFromMill(window.getStart());
                                String t_end = DateUtil.getDateStrFromMill(window.getEnd());


                                System.out.println("t_start:" + t_start + ",t_end:" + t_end);

                                OddEven e = elements.iterator().next();

                                e.setT_start(t_start);

                                out.collect(e);

                            }
                        });


        windowResult.print("window result====>");

        env.execute("test window state with socket stream");


    }
    public static class OddEven {

        private String t_start;

        private int sum;

        public OddEven(String t_start, int sum) {
            this.t_start = t_start;
            this.sum = sum;
        }

        public OddEven() {
        }

        @Override
        public String toString() {
            return "OddEven{" +
                    "t_start='" + t_start + '\'' +
                    ", sum=" + sum +
                    '}';
        }

        public String getT_start() {
            return t_start;
        }

        public void setT_start(String t_start) {
            this.t_start = t_start;
        }

        public int getSum() {
            return sum;
        }

        public void setSum(int sum) {
            this.sum = sum;
        }
    }
}

上面的程序是一个奇数偶数分别求和的案例

首先提交job

分别输入第一个一刻钟内的三条数据:

         * 5,2020-10-01 00:12:08
         * 5,2020-10-01 00:13:08
         * 5,2020-10-01 00:14:08

以及触发窗口计算的数据5,2020-10-01 00:15:08

从taskmanager中查看输出,没问题,该统计结果是15

我们接着输入x,使程序宕机

我们查看宕机时候的checkPoint点

 

重启程序,并使用上次的ck记录

查看taskmanager的log

接着我们输入如下数据

5,2020-10-01 00:16:08
5,2020-10-01 00:17:08
5,2020-10-01 00:30:08

重启后我们数据的数据和只有10,但是我们看到,15-30分钟时间段内的和flink计算为15,也就是说宕机前的一条数据也进行了计算,验证完毕

  • 1
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值