Flink个人学习整理-Windows篇(五)
窗口分为两大类:
1、基于时间
2、基于元素个数
一、基于时间的窗口
1、滚动窗口 TumblingProcessingTimeWindows
public class Flink_Time_TumblingWindow {
public static void main(String[] args) throws Exception {
// 获取运行时环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.socketTextStream("localhost",9999)
.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] strings = value.split(" ");
for (String string : strings) {
out.collect(Tuple2.of(string,1));
}
}
})
.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
})
// 5s钟的滚动窗口 这是处理时间,
// 这是按当前时间来判断的
// 如果当前时间51 那么它是属于【50~55)的 就会等4s输出结果
// 如果当前时间54 那么它是属于【50~55)的 等1s就会输出结果了
// 一: such as China which is using UTC+08:00,and you want a time window with size of one day,
// and window begins at every 00:00:00 of local time,you may use {@code of(Time.days(1),Time.hours(-8))}.
// 二: Math.abs(offset) >= size 1970-01-01 00:00:00:000 规定偏移后的时间不能大于它
// 三:划分规则
// long start = TimeWindow.getWindowStartWithOffset(now, (globalOffset + staggerOffset) % size, size);
//
// return Collections.singletonList(new TimeWindow(start, start + size));
// 例如:当前 54s offset 1s
// 那么 54 - (54 - 1 + 5) % 5 = 51 则窗口为 【51,56】
// public static long getWindowStartWithOffset(long timestamp, long offset, long windowSize) {
// return timestamp - (timestamp - offset + windowSize) % windowSize;
// }
// 又因为 end - 1,则 窗口为【51,56)
// public long maxTimestamp() {
// return end - 1;
// }
.window(TumblingProcessingTimeWindows.of(Time.seconds(5)))
// .sum(1)
// 增量聚合 + 全量获取窗口信息
// .aggregate(new MyAggregateFunction(),new MyWindowFunction())
// 全量窗口1.apply 必须要收集到所有数据的时候,排序,最大,前20%等
// .apply(new WindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
// @Override
// public void apply(String key, TimeWindow window, Iterable<Tuple2<String, Integer>> input, Collector<Tuple2<String, Integer>> out) throws Exception {
// // 取出迭代器的长度
// ArrayList<Tuple2<String, Integer>> tuple2s = Lists.newArrayList(input.iterator());
// // 输出数据 全量窗口,可以拿到窗口信息
// out.collect(Tuple2.of(new Timestamp(window.getStart())+":"+ key,tuple2s.size()));
// }
// })
// 全量窗口2.process
.process(new ProcessWindowFunction<Tuple2<String, Integer>, Tuple2<String, Integer>, String, TimeWindow>() {
@Override
public void process(String s, Context context, Iterable<Tuple2<String, Integer>> elements, Collector<Tuple2<String, Integer>> out) throws Exception {
// 取出迭代器的长度
ArrayList<Tuple2<String, Integer>> tuple2s = Lists.newArrayList(elements.iterator());
// 输出数据 全量窗口,可以拿到窗口信息
out.collect(Tuple2.of(new Timestamp(context.window().getStart())+":",tuple2s.size()));
}
})
.print("TumblingWindow 滚动窗口");
env.execute();
}
// 输入数据为增量聚合的输出
public static class MyWindowFunction implements WindowFunction<Integer,Tuple2<String,Integer>,String,TimeWindow>{
@Override
public void apply(String key, TimeWindow window, Iterable<Integer> input, Collector<Tuple2<String, Integer>> out) throws Exception {
// 取出迭代器中的数据
Integer next = input.iterator().next();
// 输出数据
out.collect(Tuple2.of(new Timestamp(window.getStart())+":"+ key,next));
}
}
public static class MyAggregateFunction implements AggregateFunction<Tuple2<String,Integer>,Integer,Integer>{
// 初始化累加器 累加器赋值
@Override
public Integer createAccumulator() {
return 0;
}
@Override
public Integer add(Tuple2<String, Integer> value, Integer accumulator) {
return accumulator + 1;
}
@Override
public Integer getResult(Integer accumulator) {
return accumulator;
}
// 在会话窗口的时候使用
@Override
public Integer merge(Integer a, Integer b) {
return null;
}
}
}
2、滑动窗口 SlidingProcessingTimeWindows
public class Flink_Time_SlidinigWindows {
public static void main(String[] args) throws Exception {
// 获取运行时环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.socketTextStream("localhost",9999)
.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
for (String s : value.split(",")) {
out.collect(Tuple2.of(s,1));
}
}
})
.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
})
.window(SlidingProcessingTimeWindows.of(Time.seconds(5),Time.seconds(2)))
.sum(1)
.print("SlidingWindow滑动窗口");
env.execute();
}
}
3、会话窗口 ProcessingTimeSessionWindows
public class Flink_Time_SessionWindows {
public static void main(String[] args) throws Exception {
// 获取运行时环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
// 从端口读取数据
env.socketTextStream("localhost",9999)
// 压平并转换为元组
.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
String[] strings = value.split(" ");
for (String string : strings) {
out.collect(Tuple2.of(string,1));
}
}
})
// 按Key分组
.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
})
.window(ProcessingTimeSessionWindows.withGap(Time.seconds(5)))
.sum(1)
.print("SessionWindows会话窗口");
env.execute();
}
}
EventTimeSessionWindows
public class Flink_Time_EventSessionWindows {
public static void main(String[] args) throws Exception {
// 获取运行时环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.socketTextStream("localhost",9999)
.map(new MapFunction<String, Sensor>() {
@Override
public Sensor map(String value) throws Exception {
String[] strings = value.split(",");
return new Sensor(
strings[0],
Long.parseLong(strings[1]),
Integer.parseInt(strings[2])
);
}
})
.assignTimestampsAndWatermarks(WatermarkStrategy.<Sensor>forBoundedOutOfOrderness(Duration.ofSeconds(2))
.withTimestampAssigner(new SerializableTimestampAssigner<Sensor>() {
@Override
public long extractTimestamp(Sensor element, long recordTimestamp) {
return element.getTs()*1000L;
}
}))
.keyBy(new KeySelector<Sensor, String>() {
@Override
public String getKey(Sensor value) throws Exception {
return value.getId();
}
})
// Session:是指WaterMark(数据-延迟)去与数据本身去比,大于等于5s
.window(EventTimeSessionWindows.withGap(Time.seconds(5)))
.sum("vc")
.print("Event_SessionWindows");
env.execute();
}
}
二、基于元素个数的
1、Count_TumblingWindows滚动窗口
public class Flink_Count_TumblingWindow {
public static void main(String[] args) throws Exception {
// 获取运行时环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.socketTextStream("localhost",9999)
.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
for (String s : value.split(" ")) {
out.collect(Tuple2.of(s,1));
}
}
})
.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
})
.countWindow(5L)
.sum(1)
.print("TumblingCountWindow计数滚动窗口");
env.execute();
}
}
2、Count_SlidingWindows滑动窗口
public class Flink_Count_SlidingWindows {
public static void main(String[] args) throws Exception {
// 获取运行时环境
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.socketTextStream("localhost",9999)
.flatMap(new FlatMapFunction<String, Tuple2<String,Integer>>() {
@Override
public void flatMap(String value, Collector<Tuple2<String, Integer>> out) throws Exception {
for (String s : value.split(" ")) {
out.collect(Tuple2.of(s,1));
}
}
})
.keyBy(new KeySelector<Tuple2<String, Integer>, String>() {
@Override
public String getKey(Tuple2<String, Integer> value) throws Exception {
return value.f0;
}
})
// 滑动步长为2 每2条计算一次
.countWindow(5L,2L)
.sum(1)
.print("CountSlidingWindow计数滑动窗口");
env.execute();
}
}