题目:
我们要将收到的订单流与到账流进行数据匹配。
1、目录结构
2、到账流数据内容
3、到账流包装类
package Beans;
public class ReceiveEvent {
private String txId;
private String payChannel;
private Long timetemp;
public ReceiveEvent() {
}
public ReceiveEvent(String txId, String payChannel, Long timetemp) {
this.txId = txId;
this.payChannel = payChannel;
this.timetemp = timetemp;
}
public String getTxId() {
return txId;
}
public void setTxId(String txId) {
this.txId = txId;
}
public String getPayChannel() {
return payChannel;
}
public void setPayChannel(String payChannel) {
this.payChannel = payChannel;
}
public Long getTimetemp() {
return timetemp;
}
public void setTimetemp(Long timetemp) {
this.timetemp = timetemp;
}
@Override
public String toString() {
return "ReceiveEvent{" +
"txId='" + txId + '\'' +
", payChannel='" + payChannel + '\'' +
", timetemp=" + timetemp +
'}';
}
}
4、首先使用connect连接 并使用侧输出流输出异常数据
package Project;
import Beans.OrderEvent;
import Beans.ReceiveEvent;
import org.apache.flink.api.common.state.ValueState;
import org.apache.flink.api.common.state.ValueStateDescriptor;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.configuration.Configuration;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.CoProcessFunction;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.net.URL;
public class TxPayMatch {
//定义侧输出流标签
private final static OutputTag<OrderEvent> unmatchedPays = new OutputTag<OrderEvent>("unmatched-pays"){};
private final static OutputTag<ReceiveEvent> unmatchedReceipts = new OutputTag<ReceiveEvent>("unmatched-receipts"){};
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取订单数据
URL orderResource = TxPayMatch.class.getResource("/OrderLog.csv");
DataStream<OrderEvent> orderEventStream = env.readTextFile(orderResource.getPath())
.map(line -> {
String[] fields = line.split(",");
return new OrderEvent(new Long(fields[0]), fields[1], fields[2], new Long(fields[3]));
})
.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<OrderEvent>() {
@Override
public long extractAscendingTimestamp(OrderEvent orderEvent) {
return orderEvent.getTimestamp() * 1000L;
}
})
.filter(data -> !"".equals(data.getTxId()));
//读取到账记录
URL receiptResource = TxPayMatch.class.getResource("/ReceiptLog.csv");
SingleOutputStreamOperator<ReceiveEvent> receiptEventStream = env.readTextFile(receiptResource.getPath()).map(line -> {
String[] fields = line.split(",");
return new ReceiveEvent(fields[0], fields[1], new Long(fields[2]));
})
.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<ReceiveEvent>() {
@Override
public long extractAscendingTimestamp(ReceiveEvent receiveEvent) {
return receiveEvent.getTimetemp() * 1000L;
}
});
//将两条流进行连接合并 进行匹配处理
SingleOutputStreamOperator<Tuple2<OrderEvent,ReceiveEvent>> resultStream = orderEventStream.keyBy(OrderEvent::getTxId)
.connect(receiptEventStream.keyBy(ReceiveEvent::getTxId))
.process(new TxPayMatchDetect());
resultStream.print("matched-pays");
resultStream.getSideOutput(unmatchedPays).print("unmatched-pays");
resultStream.getSideOutput(unmatchedReceipts).print("unmatched-receipts");
env.execute("tx match detect job");
}
//实现自定义CoProcessFunction
public static class TxPayMatchDetect extends CoProcessFunction<OrderEvent, ReceiveEvent, Tuple2<OrderEvent,ReceiveEvent>>{
//定义状态 保存当前已经到来的订单支付事件跟到账事件
ValueState<OrderEvent> payState;
ValueState<ReceiveEvent> receiveEventValueState;
@Override
public void open(Configuration parameters) throws Exception {
payState=getRuntimeContext().getState(new ValueStateDescriptor<OrderEvent>("pay",OrderEvent.class));
receiveEventValueState=getRuntimeContext().getState(new ValueStateDescriptor<ReceiveEvent>("receipt",ReceiveEvent.class));
}
@Override
public void processElement1(OrderEvent pay, Context context, Collector<Tuple2<OrderEvent, ReceiveEvent>> collector) throws Exception {
//订单支付事件 看是否有对应的到账事件
ReceiveEvent receiveEvent = receiveEventValueState.value();
if(receiveEvent!=null){
//如果receive不为空 说明到账已经来过 输出并清空
collector.collect(new Tuple2<>(pay,receiveEvent));
payState.clear();
receiveEventValueState.clear();
}else{
//如果没来过 注册定时器开始等待
context.timerService().registerEventTimeTimer((pay.getTimestamp()+5)*1000L); //等待5s
//更新状态
payState.update(pay);
}
}
@Override
public void processElement2(ReceiveEvent receipt, Context context, Collector<Tuple2<OrderEvent, ReceiveEvent>> collector) throws Exception {
//订单到账事件 看是否有对应的支付事件
OrderEvent pay = payState.value();
if(pay!=null){
//如果pay不为空 说明pay已经来过 输出并清空
collector.collect(new Tuple2<>(pay,receipt));
payState.clear();
receiveEventValueState.clear();
}else{
//如果pay没来过 注册定时器开始等待
context.timerService().registerEventTimeTimer((receipt.getTimetemp()+3)*1000L); //等待5s
//更新状态
receiveEventValueState.update(receipt);
}
}
@Override
public void onTimer(long timestamp, OnTimerContext ctx, Collector<Tuple2<OrderEvent, ReceiveEvent>> out) throws Exception {
//定时器触发 肯定有一个没来
if(payState.value()!=null){
ctx.output(unmatchedPays,payState.value());
}
if(receiveEventValueState.value()!=null){
ctx.output(unmatchedReceipts,receiveEventValueState.value());
}
payState.clear();
receiveEventValueState.clear();
}
}
}
5、使用intervalJoin 进行区间连接
package Project;
import Beans.OrderEvent;
import Beans.ReceiveEvent;
import org.apache.flink.api.java.tuple.Tuple2;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.co.ProcessJoinFunction;
import org.apache.flink.streaming.api.functions.timestamps.AscendingTimestampExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import java.net.URL;
public class TxPayMatchByJoin {
public static void main(String[] args) throws Exception{
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
//读取订单数据
URL orderResource = TxPayMatchByJoin.class.getResource("/OrderLog.csv");
DataStream<OrderEvent> orderEventStream = env.readTextFile(orderResource.getPath())
.map(line -> {
String[] fields = line.split(",");
return new OrderEvent(new Long(fields[0]), fields[1], fields[2], new Long(fields[3]));
})
.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<OrderEvent>() {
@Override
public long extractAscendingTimestamp(OrderEvent orderEvent) {
return orderEvent.getTimestamp() * 1000L;
}
})
.filter(data -> !"".equals(data.getTxId()));
//读取到账记录
URL receiptResource = TxPayMatchByJoin.class.getResource("/ReceiptLog.csv");
SingleOutputStreamOperator<ReceiveEvent> receiptEventStream = env.readTextFile(receiptResource.getPath()).map(line -> {
String[] fields = line.split(",");
return new ReceiveEvent(fields[0], fields[1], new Long(fields[2]));
})
.assignTimestampsAndWatermarks(new AscendingTimestampExtractor<ReceiveEvent>() {
@Override
public long extractAscendingTimestamp(ReceiveEvent receiveEvent) {
return receiveEvent.getTimetemp() * 1000L;
}
});
//区间连接两条流 得到匹配的数据
SingleOutputStreamOperator<Tuple2<OrderEvent,ReceiveEvent>> resultStream = orderEventStream
.keyBy(OrderEvent::getTxId)
.intervalJoin(receiptEventStream.keyBy(ReceiveEvent::getTxId))
.between(Time.seconds(-3), Time.seconds(5))
.process(new TxPayMatchDetectByJoin());
resultStream.print();
env.execute("tx pay match by join");
}
//实现自定义ProcessJoinFunction
public static class TxPayMatchDetectByJoin extends ProcessJoinFunction<OrderEvent, ReceiveEvent, Tuple2<OrderEvent,ReceiveEvent>>{
@Override
public void processElement(OrderEvent orderEvent, ReceiveEvent receiveEvent, Context context, Collector<Tuple2<OrderEvent, ReceiveEvent>> collector) throws Exception {
}
}
}