/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
* regarding copyright ownership. The ASF licenses this file
* to you under the Apache License, Version 2.0 (the
* "License"); you may not use this file except in compliance
* with the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.flink.api.common.eventtime;
import org.apache.flink.annotation.Public;
import java.time.Duration;
import static org.apache.flink.util.Preconditions.checkArgument;
import static org.apache.flink.util.Preconditions.checkNotNull;
/**
* A WatermarkGenerator for situations where records are out of order, but you can place an upper
* bound on how far the events are out of order. An out-of-order bound B means that once an event
* with timestamp T was encountered, no events older than {@code T - B} will follow any more.
*
* <p>The watermarks are generated periodically. The delay introduced by this watermark strategy is
* the periodic interval length, plus the out-of-orderness bound.
*
* JVM在加载类的时候,遵循原则: 双亲委派模型。
* 说人话: 如果JVM在加载类时,加载了两个同名的类。
* 优先加载你自己源码目录中的类
*
* 哈哈哈 二开
*/
@Public
public class BoundedOutOfOrdernessWatermarks<T> implements WatermarkGenerator<T> {
/** The maximum timestamp encountered so far. */
private long maxTimestamp;
/** The maximum out-of-orderness that this watermark generator assumes. */
private final long outOfOrdernessMillis;
/**
* Creates a new watermark generator with the given out-of-orderness bound.
*
* @param maxOutOfOrderness The bound for the out-of-orderness of the event timestamps.
*/
public BoundedOutOfOrdernessWatermarks(Duration maxOutOfOrderness) {
checkNotNull(maxOutOfOrderness, "maxOutOfOrderness");
checkArgument(!maxOutOfOrderness.isNegative(), "maxOutOfOrderness cannot be negative");
this.outOfOrdernessMillis = maxOutOfOrderness.toMillis();
// start so that our lowest watermark would be Long.MIN_VALUE.
this.maxTimestamp = Long.MIN_VALUE + outOfOrdernessMillis + 1;
}
// ------------------------------------------------------------------------
@Override
public void onEvent(T event, long eventTimestamp, WatermarkOutput output) {
maxTimestamp = Math.max(maxTimestamp, eventTimestamp);
}
@Override
public void onPeriodicEmit(WatermarkOutput output) {
Watermark watermark = new Watermark(maxTimestamp - outOfOrdernessMillis - 1);
output.emitWatermark(watermark);
System.out.println(Thread.currentThread().getName() +"向下游发送了WM:"+watermark.getTimestamp());
}
}
package com.atguigu.flink.utils;
import com.atguigu.flink.pojo.WaterSensor;
import java.util.ArrayList;
import java.util.List;
/**
* Created by Smexy on 2023/6/20
*/
public class MyUtil
{
//把窗口中的所有数据转为List集合
public static <T> List<T> parseToList(Iterable<T> iterable){
List<T> result = new ArrayList<>();
for (T t : iterable) {
result.add(t);
}
return result;
}
}
package com.atguigu.flink.pojo;
import lombok.AllArgsConstructor;
import lombok.Data;
import lombok.NoArgsConstructor;
@Data
@NoArgsConstructor
@AllArgsConstructor
public class WaterSensor {
private String id;
private Long ts;
private Integer vc;
@Override
public String toString() {
return "ws("+id+","+ts+","+vc+")";
}
}
package com.atguigu.flink.func;
import com.atguigu.flink.pojo.WaterSensor;
import org.apache.flink.api.common.functions.MapFunction;
/**
* Created by Smexy on 2023/4/4
*/
public class WaterSensorMapFunction implements MapFunction<String, WaterSensor>
{
@Override
public WaterSensor map(String value) throws Exception {
String[] words = value.split(",");
return new WaterSensor(
words[0],
Long.valueOf(words[1]),
Integer.valueOf(words[2])
);
}
}
package com.atguigu.flink.timeAndwindow;
import com.atguigu.flink.func.WaterSensorMapFunction;
import com.atguigu.flink.pojo.WaterSensor;
import com.atguigu.flink.utils.MyUtil;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessAllWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.time.Duration;
/**
* Created by Smexy on 2023/6/20
* 如果某一个并行度的watermark不推进导致整个下游的watermark无法推荐,如何处理?
*
* 查看水印?
* WEBUI: 当前看不到,看到下游收到的水印。
*
* ------------------------------
* 6个MapTask,只有1个能收到数据,但是6个人都要发送水印。
* 设置Task发送水印的资格。
* 如果一个Task,长期无法收到数据,导致水印无法更新,推进。就取消这个Task的水印发送资格!
*
*
* Idle: 空闲,赋闲。
*
*/
public class Flink13_MultiParilismWaterMark
{
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.setInteger("rest.port", 3333);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
//修改水印的默认的发送频率
env.getConfig().setAutoWatermarkInterval(2000);
//模拟多并行度
env.setParallelism(6);
//声明水印策略
WatermarkStrategy<WaterSensor> watermarkStrategy = WatermarkStrategy
//第一部分: 水印的特征。 连续,乱序
.<WaterSensor>forMonotonousTimestamps()
//第二部分: 如何从数据中提取事件时间
.withTimestampAssigner( (e, ts) -> e.getTs())
.withIdleness(Duration.ofSeconds(10))
;
env
.socketTextStream("hadoop102", 8888)
//全局汇总 所有的数据都给下游的第一个Task
.global()
.map(new WaterSensorMapFunction())
.assignTimestampsAndWatermarks(watermarkStrategy)
/*
数据落入哪个窗口不看水印(没有半毛钱关系)。只看 数据的EventTime和窗口的范围。
[0,4999):
[5000,9999):
*/
.windowAll(TumblingEventTimeWindows.of(Time.milliseconds(5000)))
.process(new ProcessAllWindowFunction<WaterSensor, String, TimeWindow>()
{
@Override
public void process(ProcessAllWindowFunction<WaterSensor, String, TimeWindow>.Context context, Iterable<WaterSensor> iterable, Collector<String> collector) throws Exception {
//输出窗口中的所有的元素
collector.collect(MyUtil.parseToList(iterable).toString());
}
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}