1、代码示例
import org.apache.flink.api.common.eventtime.SerializableTimestampAssigner;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.state.ListState;
import org.apache.flink.api.common.state.ListStateDescriptor;
import org.apache.flink.api.common.state.StateTtlConfig;
import org.apache.flink.api.common.typeinfo.TypeHint;
import org.apache.flink.api.common.typeinfo.TypeInformation;
import org.apache.flink.api.java.tuple.Tuple3;
import org.apache.flink.api.java.tuple.Tuple5;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.datastream.DataStreamSource;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.KeyedCoProcessFunction;
import org.apache.flink.util.Collector;
import java.time.Duration;
public class _02_CoProcessFunction {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 测试时限制了分区数,生产中需要设置空闲数据源
env.setParallelism(2);
env.disableOperatorChaining();
DataStreamSource<String> inputLeft = env.socketTextStream("localhost", 8888);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple3<String, String, Long>> mapLeft = inputLeft.map(new MapFunction<String, Tuple3<String, String, Long>>() {
@Override
public Tuple3<String, String, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple3<>(fields[0], fields[1], Long.parseLong(fields[2]));
}
});
SingleOutputStreamOperator<Tuple3<String, String, Long>> watermarkLeft = mapLeft.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, String, Long> input, long l) {
return input.f2;
}
}));
DataStreamSource<String> inputRight = env.socketTextStream("localhost", 9999);
// 事件时间需要设置水位线策略和时间戳
SingleOutputStreamOperator<Tuple3<String, String, Long>> mapRight = inputRight.map(new MapFunction<String, Tuple3<String, String, Long>>() {
@Override
public Tuple3<String, String, Long> map(String input) throws Exception {
String[] fields = input.split(",");
return new Tuple3<>(fields[0], fields[1], Long.parseLong(fields[2]));
}
});
SingleOutputStreamOperator<Tuple3<String, String, Long>> watermarkRight = mapRight.assignTimestampsAndWatermarks(WatermarkStrategy.<Tuple3<String, String, Long>>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner(new SerializableTimestampAssigner<Tuple3<String, String, Long>>() {
@Override
public long extractTimestamp(Tuple3<String, String, Long> input, long l) {
return input.f2;
}
}));
/**
* left-1
*
* a,1,1718089200000
* b,2,1718089200000
* c,3,1718089200000
*
* left 流数据创建的定时器时间为=>1718089205000
* left 流数据创建的定时器时间为=>1718089205000
* left 流数据创建的定时器时间为=>1718089205000
*
* Co-Keyed-Process-Watermark:No Watermark
*
* right-2
*
* a,1,1718089201000
* b,2,1718089201000
* c,3,1718089201000
*
* right 流数据创建的定时器时间为=>1718089206000
* right 流数据创建的定时器时间为=>1718089206000
* right 流数据创建的定时器时间为=>1718089206000
*
* Co-Keyed-Process-Watermark:1718089199999
*
* left-3
*
* a,4,1718089202000
* b,5,1718089202000
* c,6,1718089202000
*
* left 流数据创建的定时器时间为=>1718089207000
* left 流数据创建的定时器时间为=>1718089207000
* left 流数据创建的定时器时间为=>1718089207000
*
* Co-Keyed-Process-Watermark:1718089200999
*
* right-4
*
* a,7,1718089203000
* b,8,1718089203000
* c,9,1718089203000
*
* right 流数据创建的定时器时间为=>1718089208000
* right 流数据创建的定时器时间为=>1718089208000
* right 流数据创建的定时器时间为=>1718089208000
*
* Co-Keyed-Process-Watermark:1718089201999
*
* left-right-5
*
* a,1,1718089205001[定时器还未触发-在状态中参与计算]
* b,2,1718089205001[定时器触发-在状态中参与计算]
* c,3,1718089205001[right-stream 的 c-3 到达时,定时器已经触发了,所以没有参与计算]
*
* left 流数据创建的定时器时间为=>1718089210000
* left 流数据创建的定时器时间为=>1718089210000
* left 流数据创建的定时器时间为=>1718089210000
*
* right 流数据创建的定时器时间为=>1718089210000
* right 流数据创建的定时器时间为=>1718089210000
* right 流数据创建的定时器时间为=>1718089210000
*
* 定时器触发时,当前的 Key=>a,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
* 定时器触发时,当前的 Key=>b,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
* 定时器触发时,当前的 Key=>c,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
*
* 2> (a,1,1,1718089200000,1718089201000)
* 2> (a,1,1,1718089205001,1718089201000)
* 2> (a,1,1,1718089200000,1718089205001)
* 2> (a,1,1,1718089205001,1718089205001)
* 2> (a,4,,1718089202000,0)
*
* 1> (b,2,2,1718089200000,1718089201000)
* 1> (b,2,2,1718089205001,1718089201000)
* 1> (b,2,2,1718089200000,1718089205001)
* 1> (b,2,2,1718089205001,1718089205001)
* 1> (b,5,,1718089202000,0)
*
* 1> (c,3,3,1718089200000,1718089201000)
* 1> (c,3,3,1718089205001,1718089201000)
* 1> (c,6,,1718089202000,0)
*
* Co-Keyed-Process-Watermark:1718089205000
*/
watermarkLeft.keyBy(e -> e.f0)
.connect(watermarkRight.keyBy(e -> e.f0))
.process(new KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>() {
private ListState<Tuple3<String, String, Long>> leftListState;
private ListState<Tuple3<String, String, Long>> rightListState;
@Override
public void open(Configuration parameters) throws Exception {
// 设置状态的 TTL,防止状态持续扩大,只支持处理时间
// StateTtlConfig stateTtlConfig = StateTtlConfig.newBuilder(Duration.ofSeconds(8)).build();
ListStateDescriptor<Tuple3<String, String, Long>> leftListDesc = new ListStateDescriptor<>("left", TypeInformation.of(new TypeHint<Tuple3<String, String, Long>>() {
}));
// leftListDesc.enableTimeToLive(stateTtlConfig);
leftListState = getRuntimeContext().getListState(leftListDesc);
ListStateDescriptor<Tuple3<String, String, Long>> rightListDesc = new ListStateDescriptor<>("right", TypeInformation.of(new TypeHint<Tuple3<String, String, Long>>() {
}));
// rightListDesc.enableTimeToLive(stateTtlConfig);
rightListState = getRuntimeContext().getListState(rightListDesc);
}
@Override
public void close() throws Exception {
leftListState.clear();
rightListState.clear();
}
@Override
public void processElement1(Tuple3<String, String, Long> left, KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>.Context context, Collector<Tuple5<String, String, String, Long, Long>> collector) throws Exception {
leftListState.add(left);
long currentTimeStamp = Long.parseLong((context.timestamp() + 5000) / 1000 + "000");
System.out.println("left 流数据创建的定时器时间为=>" + currentTimeStamp);
context.timerService().registerEventTimeTimer(currentTimeStamp);
}
@Override
public void processElement2(Tuple3<String, String, Long> right, KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>.Context context, Collector<Tuple5<String, String, String, Long, Long>> collector) throws Exception {
rightListState.add(right);
long currentTimeStamp = Long.parseLong((context.timestamp() + 5000) / 1000 + "000");
System.out.println("right 流数据创建的定时器时间为=>" + currentTimeStamp);
context.timerService().registerEventTimeTimer(currentTimeStamp);
}
@Override
public void onTimer(long timestamp, KeyedCoProcessFunction<String, Tuple3<String, String, Long>, Tuple3<String, String, Long>, Tuple5<String, String, String, Long, Long>>.OnTimerContext ctx, Collector<Tuple5<String, String, String, Long, Long>> out) throws Exception {
String currentKey = ctx.getCurrentKey();
long currentWatermark = ctx.timerService().currentWatermark();
System.out.println("定时器触发时,当前的 Key=>" + currentKey + ",当前的 Watermark=>" + currentWatermark + ",当前的 timestamp=>" + timestamp);
for (Tuple3<String, String, Long> leftTuple : leftListState.get()) {
boolean isJoin = false;
for (Tuple3<String, String, Long> rightTuple : rightListState.get()) {
if (leftTuple.f1.equals(rightTuple.f1)) {
isJoin = true;
out.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, rightTuple.f1, leftTuple.f2, rightTuple.f2));
}
}
// 模拟 left join
if (!isJoin) {
out.collect(new Tuple5<>(leftTuple.f0, leftTuple.f1, "", leftTuple.f2, 0L));
}
}
}
})
.print();
env.execute();
}
}
2、测试用例
left-1
a,1,1718089200000
b,2,1718089200000
c,3,1718089200000
left 流数据创建的定时器时间为=>1718089205000
left 流数据创建的定时器时间为=>1718089205000
left 流数据创建的定时器时间为=>1718089205000
Co-Keyed-Process-Watermark:No Watermark
right-2
a,1,1718089201000
b,2,1718089201000
c,3,1718089201000
right 流数据创建的定时器时间为=>1718089206000
right 流数据创建的定时器时间为=>1718089206000
right 流数据创建的定时器时间为=>1718089206000
Co-Keyed-Process-Watermark:1718089199999
left-3
a,4,1718089202000
b,5,1718089202000
c,6,1718089202000
left 流数据创建的定时器时间为=>1718089207000
left 流数据创建的定时器时间为=>1718089207000
left 流数据创建的定时器时间为=>1718089207000
Co-Keyed-Process-Watermark:1718089200999
right-4
a,7,1718089203000
b,8,1718089203000
c,9,1718089203000
right 流数据创建的定时器时间为=>1718089208000
right 流数据创建的定时器时间为=>1718089208000
right 流数据创建的定时器时间为=>1718089208000
Co-Keyed-Process-Watermark:1718089201999
left-right-5
a,1,1718089205001[定时器还未触发-在状态中参与计算]
b,2,1718089205001[定时器触发-在状态中参与计算]
c,3,1718089205001[right-stream 的 c-3 到达时,定时器已经触发了,所以没有参与计算]
left 流数据创建的定时器时间为=>1718089210000
left 流数据创建的定时器时间为=>1718089210000
left 流数据创建的定时器时间为=>1718089210000
right 流数据创建的定时器时间为=>1718089210000
right 流数据创建的定时器时间为=>1718089210000
right 流数据创建的定时器时间为=>1718089210000
定时器触发时,当前的 Key=>a,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
定时器触发时,当前的 Key=>b,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
定时器触发时,当前的 Key=>c,当前的 Watermark=>1718089205000,当前的 timestamp=>1718089205000
2> (a,1,1,1718089200000,1718089201000)
2> (a,1,1,1718089205001,1718089201000)
2> (a,1,1,1718089200000,1718089205001)
2> (a,1,1,1718089205001,1718089205001)
2> (a,4,,1718089202000,0)
1> (b,2,2,1718089200000,1718089201000)
1> (b,2,2,1718089205001,1718089201000)
1> (b,2,2,1718089200000,1718089205001)
1> (b,2,2,1718089205001,1718089205001)
1> (b,5,,1718089202000,0)
1> (c,3,3,1718089200000,1718089201000)
1> (c,3,3,1718089205001,1718089201000)
1> (c,6,,1718089202000,0)
Co-Keyed-Process-Watermark:1718089205000