1、数据
0001,1538359890000
0001,1538359891000
0001,1538359892000
0001,1538359893000
0001,1538359894000
0001,1538359895000
0001,1538359896000
0001,1538359897000
0001,1538359898000
0001,1538359899000
0001,1538359900000
0001,1538359901000
0001,1538359902000
0001,1538359903000
2、程序
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.tuple.Tuple;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.windowing.WindowFunction;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import javax.annotation.Nullable;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Iterator;
/**
* 0001,1538359882000
* 0001,1538359886000
* 0001,1538359892000
* 0001,1538359893000
* 0001,1538359894000
* 0001,1538359896000
* 0001,1538359897000
* 0001,1538359898000
* 0001,1538359899000
* 0001,1538359899000
* 0001,1538359900000
* 0001,1538359910000
*
*
* 0001,1538359890000
* 0001,1538359903000
* 0001,1538359908000
* 0001,1538359909000
* 0001,1538359910000
* 0001,1538359920000
* 0001,1538359930000
*/
public class StreamingWindowWaterMark {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.setParallelism(2);
DataStream<String> socketStream = env.socketTextStream("localhost", 9999, "\n");
DataStream<Tuple2<String, Long>> inputMap = socketStream.map(new MapFunction<String, Tuple2<String, Long>>() {
@Override
public Tuple2<String, Long> map(String s) throws Exception {
final String[] arr = s.split(",");
return new Tuple2<String, Long>(arr[0], Long.parseLong(arr[1]));
}
});
final SingleOutputStreamOperator<Tuple2<String, Long>> waterMarkStream = inputMap.assignTimestampsAndWatermarks(new AssignerWithPeriodicWatermarks<Tuple2<String, Long>>() {
Long currentMaxTimeStamp = 0L;
Long maxOutOfOrederness = 3000L;
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
@Nullable
@Override
public Watermark getCurrentWatermark() {
return new Watermark(currentMaxTimeStamp - maxOutOfOrederness);
}
@Override
public long extractTimestamp(Tuple2<String, Long> element, long l) {
final Long timeStamp = element.f1;
currentMaxTimeStamp = Math.max(timeStamp, currentMaxTimeStamp);
final long id = Thread.currentThread().getId();
System.out.println("threadId : "+id+ " key:" + element.f0 + ",eventtime:[" + element.f1 + "|" + sdf.format(element.f1) + "], currentMaxTimestamp:[" + currentMaxTimeStamp + "|" + sdf.format(currentMaxTimeStamp) + "],watermark:[" + getCurrentWatermark().getTimestamp() + "|" + sdf.format(getCurrentWatermark().getTimestamp()) + "]");
return timeStamp;
}
});
final SingleOutputStreamOperator<String> windowStream = waterMarkStream.keyBy(0)
//窗口的开始时间 timestamp - (timestamp - offset + windowSize) % windowSize;
.window(TumblingEventTimeWindows.of(Time.seconds(3)))
.apply(new WindowFunction<Tuple2<String, Long>, String, Tuple, TimeWindow>() {
@Override
public void apply(Tuple tuple, TimeWindow timeWindow, Iterable<Tuple2<String, Long>> iterable, Collector<String> collector) throws Exception {
final String key = tuple.toString();
final ArrayList<Long> arrayList = new ArrayList<Long>();
final Iterator<Tuple2<String, Long>> iterator = iterable.iterator();
while (iterator.hasNext()) {
final Tuple2<String, Long> next = iterator.next();
arrayList.add(next.f1);
}
;
Collections.sort(arrayList);
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss.SSS");
String result = key + "," + arrayList.size() + "," + sdf.format(arrayList.get(0)) + "," + sdf.format(arrayList.get(arrayList.size() - 1)) + "," + sdf.format(timeWindow.getStart()) + "," + sdf.format(timeWindow.getEnd());
collector.collect(result);
}
});
windowStream.print();
env.execute("waterMark-time");
}
}
3、结果
3.1、并行度为1时
可以看出并行度为1的时候
窗口开始时间等于 timestamp - (timestamp - offset + windowSize) % windowSize
窗口触发时间能于waterMark>=window end time