flink多并行数据源下的waterMark触发机制

1、数据

0001,1538359890000
0001,1538359891000
0001,1538359892000
0001,1538359893000
0001,1538359894000
0001,1538359895000
0001,1538359896000
0001,1538359897000
0001,1538359898000
0001,1538359899000
0001,1538359900000
0001,1538359901000
0001,1538359902000
0001,1538359903000

2、程序
import org.apache.flink.api.common.functions.MapFunction;

import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;

import javax.annotation.Nullable;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;

/**
 * 0001,1538359882000
 * 0001,1538359886000
 * 0001,1538359892000
 * 0001,1538359893000
 * 0001,1538359894000
 * 0001,1538359896000
 * 0001,1538359897000
 * 0001,1538359898000
 * 0001,1538359899000
 * 0001,1538359899000
 * 0001,1538359900000
 * 0001,1538359910000
 *
 *
 * 0001,1538359890000
 * 0001,1538359903000
 * 0001,1538359908000
 * 0001,1538359909000
 * 0001,1538359910000
 * 0001,1538359920000
 * 0001,1538359930000
 */
public class StreamingWindowWaterMark {
    public static void main(String[] args) throws Exception {
         StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
        env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
        env.setParallelism(2);
         DataStream<String> socketStream = env.socketTextStream("localhost", 9999, "\n");
         DataStream<Tuple2<String, Long>> inputMap = socketStream.map(new MapFunction<String, Tuple2<String, Long>>() {
            @Override
            public Tuple2<String, Long> map(String s) throws Exception {
                final String[] arr = s.split(",");
                return new Tuple2<String, Long>(arr[0], Long.parseLong(arr[1]));
            }
        });

        final SingleOutputStreamOperator<Tuple2<String, Long>> waterMarkStream = inputMap.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() {

            Long currentMaxTimeStamp = 0L;
            Long maxOutOfOrederness = 3000L;
            SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");

            @Nullable
            @Override
            public Watermark getCurrentWatermark() {
                return new Watermark(currentMaxTimeStamp - maxOutOfOrederness);
            }

            @Override
            public long extractTimestamp(Tuple2<String, Long> element, long l) {
                final Long timeStamp = element.f1;
                currentMaxTimeStamp = Math.max(timeStamp, currentMaxTimeStamp);
                final long id = Thread.currentThread().getId();
                System.out.println("threadId : "+id+ " key:" + element.f0 + ",eventtime:[" + element.f1 + "|" + sdf.format(element.f1) + "], currentMaxTimestamp:[" + currentMaxTimeStamp + "|" + sdf.format(currentMaxTimeStamp) + "],watermark:[" + getCurrentWatermark().getTimestamp() + "|" + sdf.format(getCurrentWatermark().getTimestamp()) + "]");
                return timeStamp;
            }
        });


        final SingleOutputStreamOperator<String> windowStream = waterMarkStream.keyBy(0)
                //窗口的开始时间 timestamp - (timestamp - offset + windowSize) % windowSize;
                .window(TumblingEventTimeWindows.of(Time.seconds(3)))
                .apply(new WindowFunction<Tuple2<String, Long>, String, Tuple, TimeWindow>() {
                    @Override
                    public void apply(Tuple tuple, TimeWindow timeWindow, Iterable<Tuple2<String, Long>> iterable, Collector<String> collector) throws Exception {
                        final String key = tuple.toString();
                        final ArrayList<Long> arrayList = new ArrayList<Long>();
                        final Iterator<Tuple2<String, Long>> iterator = iterable.iterator();
                        while (iterator.hasNext()) {
                            final Tuple2<String, Long> next = iterator.next();
                            arrayList.add(next.f1);
                        }
                        ;
                        Collections.sort(arrayList);
                        SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
                        String result = key + "," + arrayList.size() + "," + sdf.format(arrayList.get(0)) + "," + sdf.format(arrayList.get(arrayList.size() - 1)) + "," + sdf.format(timeWindow.getStart()) + "," + sdf.format(timeWindow.getEnd());
                        collector.collect(result);
                    }
                });

        windowStream.print();

        env.execute("waterMark-time");


    }
}
3、结果
3.1、并行度为1时

在这里插入图片描述

可以看出并行度为1的时候

窗口开始时间等于 timestamp - (timestamp - offset + windowSize) % windowSize

窗口触发时间能于waterMark>=window end time

3.2、并行度为2时

在这里插入图片描述

3.3、并行度为3时

在这里插入图片描述

3.4、并行度为4时

在这里插入图片描述

3.5、并行度为8时

在这里插入图片描述

3.6、并行为2 窗口时间为5 延时时间为3

在这里插入图片描述

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值