目录
Flink自定义水印
package com.home.flink.chapter07;
import com.home.flink.bean.WaterSensor;
import com.home.flink.util.homeUtil;
import org.apache.flink.api.common.eventtime.*;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.windowing.ProcessWindowFunction;
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.api.windowing.windows.TimeWindow;
import org.apache.flink.util.Collector;
import java.util.List;
public class Flink_Watermark_Custom {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.setInteger("rest.port", 20000);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
env.setParallelism(1);
env.getConfig().setAutoWatermarkInterval(3000);
env
.socketTextStream("hadoop162", 9999)
.map(line -> {
String[] data = line.split(",");
return new WaterSensor(data[0], Long.valueOf(data[1]), Integer.valueOf(data[2]));
})
.assignTimestampsAndWatermarks(
WatermarkStrategy
.<WaterSensor>forGenerator(new WatermarkStrategy<WaterSensor>() {
@Override
public WatermarkGenerator<WaterSensor> createWatermarkGenerator(WatermarkGeneratorSupplier.Context context) {
return new MyPeriodWMG(3);
}
})
.withTimestampAssigner((ws, ts) -> ws.getTs())
)
.keyBy(WaterSensor::getId)
.window(TumblingEventTimeWindows.of(Time.seconds(5)))
.process(new ProcessWindowFunction<WaterSensor, String, String, TimeWindow>() {
@Override
public void process(String key,
Context ctx,
Iterable<WaterSensor> elements,
Collector<String> out) throws Exception {
List<WaterSensor> list = homeUtil.toList(elements);
String msg = key + " " + list + " " + ctx.window();
out.collect(msg);
}
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
public static class MyPeriodWMG implements WatermarkGenerator<WaterSensor> {
private long maxTs = Long.MIN_VALUE;
// 允许的最大延迟时间 ms
private final long maxDelay;
public MyPeriodWMG(long maxDelay) {
this.maxDelay = maxDelay * 1000;
this.maxTs = Long.MIN_VALUE + this.maxDelay + 1;
}
@Override
public void onEvent(WaterSensor event, long eventTimestamp, WatermarkOutput output) {
System.out.println("MyPeriodWMG.onEvent");
// 更新最大时间戳
maxTs = Math.max(maxTs, eventTimestamp);
output.emitWatermark(new Watermark(maxTs - maxDelay - 1));
}
@Override
public void onPeriodicEmit(WatermarkOutput output) {
System.out.println("MyPeriodWMG.onPeriodicEmit");
// 发射水印
//output.emitWatermark(new Watermark(maxTs - maxDelay - 1));
}
}
}
Flink定时器Timer
package com.home.flink.chapter07.timer;
import com.home.flink.bean.WaterSensor;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.KeyedProcessFunction;
import org.apache.flink.util.Collector;
import java.time.Duration;
public class Flink01_Timer_Project {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.setInteger("rest.port", 20000);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
env.setParallelism(1);
SingleOutputStreamOperator<String> main = env
.socketTextStream("hadoop162", 9999)
.map(line -> {
String[] data = line.split(",");
return new WaterSensor(data[0], Long.valueOf(data[1]) * 1000, Integer.valueOf(data[2]));
})
.assignTimestampsAndWatermarks(
WatermarkStrategy
.<WaterSensor>forBoundedOutOfOrderness(Duration.ofSeconds(3))
.withTimestampAssigner((ws, ts) -> ws.getTs())
)
.keyBy(WaterSensor::getId)
.process(new KeyedProcessFunction<String, WaterSensor, String>() {
private Long timerTs;
boolean isFirst = true;
int lastVc;
@Override
public void processElement(WaterSensor value,
Context ctx,
Collector<String> out) throws Exception {
if (isFirst) {
// 第一条数据进来, 注册定时器
timerTs = ctx.timestamp() + 5000;
ctx.timerService().registerEventTimeTimer(timerTs);
isFirst = false;
}else{
// 判断水位相比上个水位是否上升
Integer currentVc = value.getVc();
if(currentVc <= lastVc){ // 当水位没有上升, 取消定时器
ctx.timerService().deleteEventTimeTimer(timerTs);
// 重新注册定时器
timerTs = ctx.timestamp() + 5000;
ctx.timerService().registerEventTimeTimer(timerTs);
}
}
lastVc = value.getVc();
}
@Override
public void onTimer(long timestamp,
OnTimerContext ctx,
Collector<String> out) throws Exception {
out.collect(ctx.getCurrentKey() + " 5s内水位连续上升, 预警...");
isFirst = true;
}
});
main.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
FLINK广播状态
package com.home.flink.chapter07.state;
import org.apache.flink.api.common.state.BroadcastState;
import org.apache.flink.api.common.state.MapStateDescriptor;
import org.apache.flink.api.common.state.ReadOnlyBroadcastState;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.BroadcastStream;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.BroadcastProcessFunction;
import org.apache.flink.util.Collector;
public class Flink02_State_BCState {
public static void main(String[] args) {
Configuration conf = new Configuration();
conf.setInteger("rest.port", 20000);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment(conf);
env.setParallelism(2);
MapStateDescriptor<String, String> controlStateDesc =
new MapStateDescriptor<>("controlState", String.class, String.class);
DataStreamSource<String> dataStream = env
.socketTextStream("hadoop162", 8888);
DataStreamSource<String> controlStream = env
.socketTextStream("hadoop162", 9999);
// 1. 把控制流做成广播流
BroadcastStream<String> bcStream = controlStream.broadcast(controlStateDesc);
// 2. 数据流进行connect
dataStream
.connect(bcStream)
.process(new BroadcastProcessFunction<String, String, String>() {
// 处理数据流中的数据
@Override
public void processElement(String value,
ReadOnlyContext ctx,
Collector<String> out) throws Exception {
ReadOnlyBroadcastState<String, String> bcState = ctx.getBroadcastState(controlStateDesc);
String aSwitch = bcState.get("logicSwitch");
if ("a".equals(aSwitch)) {
out.collect("使用 1 号逻辑处理数据...");
} else if ("b".equals(aSwitch)) {
out.collect("使用 2 号逻辑处理数据...");
} else {
out.collect("使用 默认 号逻辑处理数据...");
}
}
// 处理广播流中的数据
@Override
public void processBroadcastElement(String value,
Context ctx,
Collector<String> out) throws Exception {
// 把广播流中的元素放入到广播状态中, 然后数据流中的每个并行度都可以获取到
BroadcastState<String, String> bcState = ctx.getBroadcastState(controlStateDesc);
bcState.put("logicSwitch", value);
}
})
.print();
try {
env.execute();
} catch (Exception e) {
e.printStackTrace();
}
}
}
/*
两个流: 一个流一般叫数据流, 另外一个有些配置信息(控制流)
把控制流做成广播流, 与数据流进行connect, 然后控制流中的数据就在一个广播状态中, 数据流就可以读取到这个广播状态
*/