文章目录
0.GitHub项目连接:
1. 案例-离厂超时预警 实现思路
利用Flink CEP 的实现思路,暂时没考虑其他的干扰条件。假定只刷卡 出 进一次。
/**
解决思路:
利用Flink CEP进行时间流的模式匹配,并设定超时时间(90 minutes)。
具体如下(简单思路,只考虑员工 出 进一次,即door_status从2变为1):
按照某个员工的id聚合的某日的刷卡进出事件:
员工入厂,进,door_status=1
员工中途离厂,出,door_status=2
员工再次进厂,进,door_status=1
员工中途离厂,出,door_status=2,如果距离上次刷卡出厂的时间超过90minutes仍未检测到员工刷卡入厂,则进行超时预警。
*/
/**
* 从员工刷卡出门开始,与当前时间做比较,相差90分钟以上时,则检测此员工的刷卡进入状态,如果没有检测到,则进行离厂预警。
* *****
* 1.CEP 复杂事件处理
* *****
* 1.1 定义一个Pattern,匹配出Pattern里in door与out door间隔大于90分钟的事件
* 1.2 对这些进行输出预警,获取超时未匹配的流
* *****
1.1 定义一个刷卡事件类
- 1.IN: DataSource -> DataStream -> Transformations -> DataStream -> keyBy ->KeyedStream
- 2.Pattern:Pattern.begin.where.next.where…within(Time windowTime)
- 3.PatternStream:CEP.pattern(KeyedStream,Pattern)
- 4.OutputTag:new OutputTag(…)
- 5.SingleOutputStreamOperator: PatternStream.flatSelect(OutputTag,PatternFlatTimeoutFunction,PatternFlatSelectFunction)
- 6.DataStream:SingleOutputStreamOperator.getSideOutput(OutputTag)
- 7.OUT:DataStream -> Transformations -> DataStream -> DataSink
//AccessEvent,刷卡访问事件
package com.events;
import lombok.NoArgsConstructor;
import java.io.Serializable;
import java.util.Objects;
/**
* AccessEvent 刷卡访问事件的实体类对象
* */
//@Data
//@AllArgsConstructor
@NoArgsConstructor
public class AccessEvent implements Serializable {
public Integer id;
public Integer door_id;
public String door_status;
public Integer event_type;
public String employee_sys_no;
public String datetime;
public AccessEvent(AccessEvent indoor) {
}
public int getId() {
return id;
}
public void setId(int id) {
this.id = id;
}
public int getDoor_id() {
return door_id;
}
public void setDoor_id(int door_id) {
this.door_id = door_id;
}
public String getDoor_status() {
return door_status;
}
public void setDoor_status(String door_status) {
this.door_status = door_status;
}
public int getEvent_type() {
return event_type;
}
public void setEvent_type(int event_type) {
this.event_type = event_type;
}
public String getEmployee_sys_no() {
return employee_sys_no;
}
public void setEmployee_sys_no(String employee_sys_no) {
this.employee_sys_no = employee_sys_no;
}
public String getDatetime() {
return datetime;
}
public void setDatetime(String datetime) {
this.datetime = datetime;
}
public AccessEvent(int id, int door_id, String door_status, int event_type, String employee_sys_no, String datetime) {
this.id = id;
this.door_id = door_id;
this.door_status = door_status;
this.event_type = event_type;
this.employee_sys_no = employee_sys_no;
this.datetime = datetime;
}
@Override
public boolean equals(Object o) {
if (this == o) return true;
if (o == null || getClass() != o.getClass()) return false;
AccessEvent that = (AccessEvent) o;
return id == that.id &&
door_id == that.door_id &&
door_status == that.door_status &&
event_type == that.event_type &&
employee_sys_no == that.employee_sys_no &&
Objects.equals(datetime, that.datetime);
}
@Override
public int hashCode() {
return Objects.hash(id, door_id, door_status, event_type, employee_sys_no, datetime);
}
@Override
public String toString() {
return "AccessEvent{" +
"id=" + id +
", door_id=" + door_id +
", door_status=" + door_status +
", event_type=" + event_type +
", employee_sys_no=" + employee_sys_no +
", datetime='" + datetime + '\'' +
'}';
}
}
1.2 定义一个事件模式(Pattern)
/**
* 定义一个事件模式(Pattern)
* */
Pattern<AccessEvent,AccessEvent> warningPattern=Pattern.<AccessEvent>begin("outdoor")
.where(new SimpleCondition<AccessEvent>() {
private static final long serialVersionUID = -6847788055093903603L;
@Override
public boolean filter(AccessEvent accessEvent) throws Exception {
return accessEvent.getDoor_status().equals("2");
}
})
.next("indoor").where(new SimpleCondition<AccessEvent>() {
@Override
public boolean filter(AccessEvent accessEvent) throws Exception {
return accessEvent.getDoor_status().equals("1");
}
})
.within(Time.seconds(10)).times(1);//为方便测试,这里将间隔时间设置为10s
/**
可以设置Pattern模式的属性,固定次数(times)、匹配发生一次以上(oneOrMore)、匹配发生多次以上(timesOrMore)
*/
1.3 Build pattern stream,模式匹配输出
PatternStream<AccessEvent> accessEventPatternStream=CEP.pattern(dataStreamKeyBy,warningPattern);;//按照员工ID去匹配
1.4 Use side output get timeout stream,获取超时输出流
/**
创建OutputTag利用side output 获取超时未匹配的流
*/
OutputTag<AccessEvent> outputTag=new OutputTag<AccessEvent>("timedout"){
private static final long serialVersionUID = 773503794597666247L;
};
SingleOutputStreamOperator<AccessEvent> timeout=accessEventPatternStream.flatSelect(
outputTag,
new AccessTimedOut(),
new FlatSelect()
);
/**
* 把超时的事件收集起来
* */
public static class AccessTimedOut implements PatternFlatTimeoutFunction<AccessEvent,AccessEvent> {
private static final long serialVersionUID = -4214641891396057732L;
@Override
public void timeout(Map<String, List<AccessEvent>> pattern, long timeStamp, Collector<AccessEvent> out) throws Exception {
if (null!=pattern.get("outdoor")){
for (AccessEvent accessEvent:pattern.get("outdoor")){
System.out.println("timeout outdoor:"+accessEvent.getEmployee_sys_no());
out.collect(accessEvent);
}
}
//因为indoor 超时了,还没有收到indoor,所以这里是拿不到 indoor 的
System.out.println("timeout end"+pattern.get("indoor"));
}
}
案例 Demo
package com;
import com.events.AccessEvent;
import com.utils.JsonFilter;
import com.utils.KafkaConfigUtil;
import org.apache.flink.api.common.functions.FilterFunction;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.common.restartstrategy.RestartStrategies;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.cep.*;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.CheckpointConfig;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.functions.IngestionTimeExtractor;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import scala.Tuple6;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class Test2 {
private static Logger log = LoggerFactory.getLogger(Test2.class);
public static void main(String[] args) throws Exception {
/**
* Flink 配置
* */
StreamExecutionEnvironment env=StreamExecutionEnvironment.getExecutionEnvironment();
// env.getCheckpointConfig().setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);//设置事件时间
env.enableCheckpointing(1000);非常关键,一定要设置启动检查点
env.getCheckpointConfig().enableExternalizedCheckpoints(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.getConfig().disableSysoutLogging();//设置此可以屏蔽掉日记打印情况
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(5, 10000));
// 不使用POJO的时间
final AssignerWithPeriodicWatermarks extractor =new IngestionTimeExtractor<AccessEvent>();
/**
* Kafka配置
* */
Properties properties = KafkaConfigUtil.buildKafkaProps();//kafka参数配置
FlinkKafkaConsumer<String> consumer = new FlinkKafkaConsumer<>(KafkaConfigUtil.topic, new SimpleStringSchema(), properties);
DataStream<AccessEvent> accessEventDataStream=env.addSource(consumer)
.filter(new FilterFunction<String>() {
@Override
public boolean filter(String jsonVal) throws Exception {
return new JsonFilter().getJsonFilter(jsonVal);
}
}).map(new MapFunction<String, String>() {
@Override
public String map(String jsonvalue) throws Exception {
return new JsonFilter().dataMap(jsonvalue);
}
}).map(new MapFunction<String, Tuple6<Integer,Integer,String,Integer,String,String>>() {
//将json解析为Tuple6类型
@Override
public Tuple6<Integer,Integer,String,Integer,String,String> map(String dataField) throws Exception {
return new JsonFilter().fieldMap(dataField);
}
}).map(new MapFunction<Tuple6<Integer, Integer, String, Integer, String, String>, AccessEvent>() {
//将Tuple6类型解析为AccessEvent
@Override
public AccessEvent map(Tuple6<Integer, Integer, String, Integer, String, String> tuple6) throws Exception {
return new JsonFilter().mapToAccessEvent(tuple6);
}
}).assignTimestampsAndWatermarks(extractor);
// accessEventDataStream.print();
//根据 employee_sys_no 进行分组
// DataStream<AccessEvent> dataStreamKeyBy =accessEventDataStream.keyBy("employee_sys_no");
DataStream<AccessEvent> dataStreamKeyBy=accessEventDataStream.keyBy(AccessEvent::getEmployee_sys_no);
/**
* 定义一个事件模式(Pattern)
* */
Pattern<AccessEvent,AccessEvent> warningPattern=Pattern.<AccessEvent>begin("outdoor")
.where(new SimpleCondition<AccessEvent>() {
private static final long serialVersionUID = -6847788055093903603L;
@Override
public boolean filter(AccessEvent accessEvent) throws Exception {
return accessEvent.getDoor_status().equals("2");
}
})
.next("indoor").where(new SimpleCondition<AccessEvent>() {
@Override
public boolean filter(AccessEvent accessEvent) throws Exception {
return accessEvent.getDoor_status().equals("1");
}
})
.within(Time.seconds(10)).times(1);//时间自行选择,这里为了测试,设置为10s
PatternStream<AccessEvent> accessEventPatternStream=CEP.pattern(dataStreamKeyBy,warningPattern);
/**
* 匹配事件处理
* */
/**
测试 最简单的accessEventPatternStream.select,测试ok
SingleOutputStreamOperator<AccessEvent> singleOutputStreamOperator=accessEventPatternStream.select(new PatternSelectFunction<AccessEvent,AccessEvent>() {
@Override
public AccessEvent select(Map<String, List<AccessEvent>> map) throws Exception {
return (AccessEvent) map.get("indoor").get(0);
}
});
singleOutputStreamOperator.print();
*/
/**
设置输出超时标志
*/
OutputTag<AccessEvent> outputTag=new OutputTag<AccessEvent>("timedout"){
private static final long serialVersionUID = 773503794597666247L;
};
SingleOutputStreamOperator<AccessEvent> timeout=accessEventPatternStream.flatSelect(
outputTag,
new AccessTimedOut(),
new FlatSelect()
);
//打印输出超时的AccessEvent
timeout.getSideOutput(outputTag).print();
timeout.print();
env.execute(Test2.class.getSimpleName());
}
/**
* 把超时的事件收集起来
* */
public static class AccessTimedOut implements PatternFlatTimeoutFunction<AccessEvent,AccessEvent> {
private static final long serialVersionUID = -4214641891396057732L;
@Override
public void timeout(Map<String, List<AccessEvent>> pattern, long timeStamp, Collector<AccessEvent> out) throws Exception {
if (null!=pattern.get("outdoor")){
for (AccessEvent accessEvent:pattern.get("outdoor")){
System.out.println("timeout outdoor:"+accessEvent.getEmployee_sys_no());
out.collect(accessEvent);
}
}
//因为indoor 超时了,还没有收到indoor,所以这里是拿不到 indoor 的
System.out.println("timeout end"+pattern.get("indoor"));
}
}
/**
* 未超时的事件
* */
public static class FlatSelect implements PatternFlatSelectFunction<AccessEvent,AccessEvent> {
private static final long serialVersionUID = -3029589950677623844L;
@Override
public void flatSelect(Map<String, List<AccessEvent>> pattern, Collector<AccessEvent> collector) throws Exception {
System.out.println("flatSelect"+pattern.get("indoor"));
collector.collect(new AccessEvent());
}
}
}
2. 其他可参考案例
2.1 案例-订单超时统计
超时统计遇到的问题:
-
流计算: 是属于消息触发计算,没有消息到达无法进行运算
-
超时汇总指标: 汇总未到达的消息,eg“发货超6小时未揽收的订单量”
解决思路: 没有消息到达—>制造消息
利用双流Join
解决方案:
-
基于消息队列
- 延迟读取
- 延迟下发(增加运维工作量)
-
基于Flink State
-
TimerService
-
CEP(类似时间序列的正则匹配)
-
Example
import com.alibaba.fastjson.JSON;
import com.alibaba.fastjson.JSONArray;
import com.alibaba.fastjson.JSONObject;
import com.suning.flink.function.MessageHandle;
import com.suning.flink.util.Configuration;
import org.apache.commons.lang.StringUtils;
import org.apache.flink.api.common.functions.MapFunction;
import org.apache.flink.api.java.functions.KeySelector;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternSelectFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.PatternTimeoutFunction;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.IterativeCondition;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.TimeCharacteristic;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.functions.AssignerWithPeriodicWatermarks;
import org.apache.flink.streaming.api.watermark.Watermark;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer08;
import org.apache.flink.streaming.util.serialization.SimpleStringSchema;
import org.apache.flink.util.OutputTag;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Properties;
/**
* @author 18074935 XU.MIN
* @date 2020/4/7 11:00
*/
public class LogisticsAbnormalCEPdeal {
public static void main(String[] args) throws Exception {
final long delay = 5 * 1000L;
final OutputTag<String> outputTag = new OutputTag<String>("side-output") {
};
final StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(2);
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
Properties orderProp = new Properties();
orderProp.setProperty("bootstrap.servers", Configuration.getString("order.bootstrap.servers"));
orderProp.setProperty("zookeeper.connect", Configuration.getString("order.zookeeper.connect"));
orderProp.setProperty("fetch.message.max.bytes", "10485760");
orderProp.setProperty("group.id", Configuration.getString("order.group.id"));
FlinkKafkaConsumer08<String> source = new FlinkKafkaConsumer08<>(Configuration.getString("dtm.order.links"), new SimpleStringSchema(), orderProp);
source.setStartFromLatest();
DataStream<Event> input = env
.addSource(source)
.filter(new MessageHandle.DataFilter())
.map((MapFunction<String, Event>) message -> {
Event event = new Event();
JSONObject pointMsg = JSON.parseObject(message);
String id = pointMsg.getString("id");
JSONObject data = pointMsg.getJSONObject("data");
String asomOrderItemId = data.getString("asomOrderItemId");
String srvType = data.getString("srv_type");
String serviceTime = data.getString("serviceTime");
event.setAsomOrderItemId(asomOrderItemId);
event.setServiceTime(serviceTime);
event.setSrvType(srvType);
event.setId(id);
return event;
});
KeyedStream<Event, String> watermark = input
.assignTimestampsAndWatermarks(
// new AscendingTimestampExtractor<Event>() {
// @Override
// public long extractAscendingTimestamp(Event event) {
// String serviceTime = event.getServiceTime();
// SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
// long timestamp = 0L;
// try {
// Date date = format.parse(serviceTime);
// timestamp = date.getTime();
// } catch (ParseException e) {
// e.printStackTrace();
// }
// return timestamp;
// }
// }
new AssignerWithPeriodicWatermarks<Event>() {
private final long maxOutOfOrderness = delay;
private long currentMaxTimestamp = 0L;
@Override
public Watermark getCurrentWatermark() {
return new Watermark(currentMaxTimestamp - maxOutOfOrderness);
}
@Override
public long extractTimestamp(Event element, long previousElementTimestamp) {
String serviceTime = element.getServiceTime() + " 000";
SimpleDateFormat format = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss SSS");
long timestamp = 0L;
try {
Date date = format.parse(serviceTime);
timestamp = date.getTime();
currentMaxTimestamp = Math.max(timestamp, currentMaxTimestamp);
System.out.println("***" + format.format(getCurrentWatermark().getTimestamp()) + "****" + element.getId() + "***");
} catch (ParseException e) {
e.printStackTrace();
}
return timestamp;
}
})
.keyBy((KeySelector<Event, String>) Event::getAsomOrderItemId);
Pattern<Event, ?> pattern = Pattern.<Event>begin("start").where(
new SimpleCondition<Event>() {
@Override
public boolean filter(Event event) {
System.out.println("start" + event.getId());
return (StringUtils.equals(event.getSrvType(), "ZS04") && StringUtils.equals(event.getId(), "20000020"));
}
}
).followedBy("middle").where(
new SimpleCondition<Event>() {
@Override
public boolean filter(Event event) {
System.out.println("middle" + event.getId());
return (StringUtils.equals(event.getId(), "20000028") || StringUtils.equals(event.getId(), "20000029"));
}
}
).followedBy("end").where(
new IterativeCondition<Event>() {
@Override
public boolean filter(Event event, Context<Event> context) throws Exception {
if (StringUtils.equals(event.getId(), "20000031") || StringUtils.equals(event.getId(), "20000033")) {
System.out.println("end" + event.getId());
String finishTime = event.getServiceTime();
String changeTime = "";
for (Event e : context.getEventsForPattern("middle")) {
if (e.getServiceTime().compareTo(changeTime) > 0) {
changeTime = e.getServiceTime();
}
}
return finishTime.compareTo(changeTime) > 0;
}
return false;
}
}
).within(Time.minutes(30L));
watermark.map((MapFunction<Event, JSONObject>) event -> {
JSONObject result = new JSONObject();
result.put("id", event.getId());
result.put("asomOrderItemId", event.getAsomOrderItemId());
result.put("serviceTime", event.getServiceTime());
result.put("srvType", event.getSrvType());
result.put("第二次", "打印");
return result;
}).print();
PatternStream<Event> patternStream = CEP.pattern(watermark, pattern);
// DataStream<String> result = patternStream.process(
// new PatternProcessFunction<Event, String>() {
// @Override
// public void processMatch(
// Map<String, List<Event>> pattern,
// Context ctx,
// Collector<String> out) throws Exception {
//
//
// out.collect(pattern.toString());
// }
// });
SingleOutputStreamOperator<String> flatResult = patternStream.select(
outputTag,
new PatternTimeoutFunction<Event, String>() {
@Override
public String timeout(Map<String, List<Event>> map, long l) throws Exception {
JSONObject output = new JSONObject();
for (String key : map.keySet()) {
JSONArray collect = new JSONArray();
for (Event event : map.get(key)) {
collect.add(event.getId());
}
output.put(key, collect);
}
return output.toString();
}
},
new PatternSelectFunction<Event, String>() {
@Override
public String select(Map<String, List<Event>> map) throws Exception {
JSONObject output = new JSONObject();
for (String key : map.keySet()) {
JSONArray collect = new JSONArray();
for (Event event : map.get(key)) {
collect.add(event.getId());
}
output.put(key, collect);
}
return output.toString();
}
}
);
DataStream<String> timeoutFlatResult = flatResult.getSideOutput(outputTag);
timeoutFlatResult.print();
flatResult.print();
env.execute("Flink CEP");
}
}
2.2 案例-CEP 实现空气质量检测
假设一个数据流,持续写入各地空气质量信息,如果某地连续两次空气质量超过6和7或是小于3和2,就认为其控制质量异常,将记录这条预警,并且将记录再进行处理,如果前后两次样本差值的绝对值小于2,则认为是空气质量超标,否则是空气异常波动。
代码流程如下:
首先定义空气质量对象,包括ID,城市,空气质量,记录时间和时间戳。
public class AirQualityRecoder implements Serializable{
private String id;
private String city;
private Integer airQuality;
private Date emmit;
private Long et;
}
在讲解CEP之前,定义两个POJO,来做数据存储
//一个用于存放前后数据的对比记录
public class AirWarningRecoder{
private String city;
private AirQualityRecoder first;
private AirQualityRecoder second;
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public AirQualityRecoder getFirst() {
return first;
}
public void setFirst(AirQualityRecoder first) {
this.first = first;
}
public AirQualityRecoder getSecond() {
return second;
}
public void setSecond(AirQualityRecoder second) {
this.second = second;
}
public AirWarningRecoder(AirQualityRecoder first, AirQualityRecoder second) {
this.first = first;
this.second = second;
}
@Override
public String toString() {
return "AirWarningRecoder{" +
"city='" + city + '\'' +
", first=" + first +
", second=" + second +
'}';
}
public AirWarningRecoder(String city, AirQualityRecoder first, AirQualityRecoder second) {
this.city = city;
this.first = first;
this.second = second;
}
}
//另外一个用于存放空气预警类型
public class AirWarningTypeRecoder{
private String city;
private String wtype;
private Integer first;
private Integer second;
@Override
public String toString() {
return "AirWarningTypeRecoder{" +
"city='" + city + '\'' +
", wtype='" + wtype + '\'' +
", first=" + first +
", second=" + second +
'}';
}
public Integer getFirst() {
return first;
}
public void setFirst(Integer first) {
this.first = first;
}
public Integer getSecond() {
return second;
}
public void setSecond(Integer second) {
this.second = second;
}
public String getCity() {
return city;
}
public void setCity(String city) {
this.city = city;
}
public String getWtype() {
return wtype;
}
public void setWtype(String wtype) {
this.wtype = wtype;
}
}
/**
下面就是具体的CEP细节流程,首先我们需要定义Pattern,用于识别预警数据,第二个Pattern则没做操作,直接将数据交个下一个处理步骤。
然后将pattern和数据流注册给CEP,再对起进行select和map操作
*/
public class CepApp{
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
Map properties= new HashMap();
properties.put("bootstrap.servers", "localhost:9092");
properties.put("group.id", "test");
properties.put("enable.auto.commit", "true");
properties.put("auto.commit.interval.ms", "1000");
properties.put("auto.offset.reset", "earliest");
properties.put("session.timeout.ms", "30000");
// properties.put("key.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
// properties.put("value.deserializer", "org.apache.kafka.common.serialization.StringDeserializer");
properties.put("topic", "test1");
ParameterTool parameterTool = ParameterTool.fromMap(properties);
FlinkKafkaConsumer010 consumer010 = new FlinkKafkaConsumer010(
parameterTool.getRequired("topic"), new WriteIntoKafka.SimpleAirQualityRecoderSchema(), parameterTool.getProperties());
DataStream<AirQualityRecoder> aqrStream = env.addSource(consumer010);
//定义好数据源,开始
Pattern<AirQualityRecoder,?> warningPattern=Pattern.<AriQualityRecoder>begin("first")
.subtype(AirQualityRecoder.class)
.where(new IterativeCondition<AirQualityRecoder>(){
@Override
public boolean filter(AirQualityRecoder value,Context<AirQualityRecoder> ctx) throws Exception{
return value.getAirQuality()>=6;
}
}).or(new IterativeCondition<AirQualityRecoder>(){
@Override
public boolean filter(AirQualityRecoder value,Context<AirQualityRecoder> ctx) throws Exception{
return value.getAirQuality()<=3;
}
})
.next("second")
.where(new IterativeCondition<AirQualityRecoder>(){
@Override
public boolean filter(AirQualityRecoder value,Context<AirQualityRecoder> ctx) throws Exception{
return value.getAirQuality()<=2;
}
})
.within(Time.seconds(60));
PatternStream<AirQualityRecoder> warningPatternStream=CEP.pattern(
aqrStream.keyBy("city"),//"city"
warningPattern);
DataStream<AirWarnningRecoder> warnings=warningPatternStream.select(
(Map<String,List<AirQualityRecode>> pattern) ->{
AirQualityRecoder first=(AirQualityRecoder) pattern.get("first").get(0);
AirQualityRecoder second=(AirQualityRecoder) pattern.get("second").get(0);
return new AirWarnningRecoder(first.getCity(),first,second);
}
);
Pattern<AirWarnningRecoder,?> typePattern=Pattern.<AirWarnningRecoder>begin("pass")
.subtype(AirWarnningRecoder.class);
PatternStream<AirWarnningRecoder> typePatternStream=CEP.pattern(
warnings.keyBy(AirWarnningRecoder::getCity),
typePattern
);
DataStream<AirWarnningTypeRecoder> awt=typePatternStream.select(
(Map<String,List<AirWarnningRecoder>> pattern) ->{
AirWarnningRecoder awr=(AirWarnningRecoder) pattern.get("pass").get(0);
AirWarnningTypeRecoder awtr=new AirWarnningTypeRecoder();
awtr.setCity(awr.getCity());
awtr.setFirst(awr.getFirst().getAirQuality());
awtr.setSecond(awr.getSecond().getAirQuality());
int res=Math.abs(awr.getFirst()-awtr.getSecond());
if(res<=2){
awtr.setWtype("质量超标");
}else{
awtr.setWtype("波动较大");
}
return awtr;
}
);
warnings.print();
awt.print();
env.execute("cep run!!!");
}
2.3 案例-Flink CEP实现超时状态监控
/**
CEP - Complex Event Processing复杂事件处理。
订单下单后超过多久还未进行支付确认。
打车订单生成后超过多长时间没有确认上车。
外卖超过预定送达时间多久还没有确认送达。
*/
/**
以订单为例:
1.订单在初始化之后,长时间处于处理中[可能由于网络、第三方处理耗时故障等原因],从初始化状态开始后超过一定时间没有出现终态则发出告警。
2.初始化 02 --->终态,成功:00
--->终态,失败:01
--->时间一过,检测不到成功Or失败的状态,则告警
3.注意不要使用数据本身的时间和水印去推动事件时间
*/
Flink CEP实现步骤
-
1.IN:DataSource -> DataStream ->Transformations ->DataStream
-
2.Pattern:Pattern.begin.where.next.where…times…
-
3.PatternStream:CEP.pattern(DataStream,Pattern)
-
4.DataStream: PatternStream.select(PatternSelectFunction)
PatternStream.flatselect(PatternSelectFunction)
-
5.OUT: DataStream -> Transformations ->DataStream -> DataSink
FlinkCEP匹配超时实现步骤
TimeoutCEP的流需要keyBy,即KeyedStream,如果inputStream不是KeyedStream,会new 一个0字节的Key。
KeySelector<IN, Byte> keySelector = new NullByteKeySelector<>();
Pattern最后调用within设置窗口时间。
如果对主键进行分组,一个时间窗口内最多只会匹配出一个超时事件,使用PatternStream.select(…)就可以。
- 1.IN: DataSource -> DataStream -> Transformations -> DataStream -> keyBy ->KeyedStream
- 2.Pattern:Pattern.begin.where.next.where…within(Time windowTime)
- 3.PatternStream:CEP.pattern(KeyedStream,Pattern)
- 4.OutputTag:new OutputTag(…)
- 5.SingleOutputStreamOperator: PatternStream.flatSelect(OutputTag,PatternFlatTimeoutFunction,PatternFlatSelectFunction)
- 6.DataStream:SingleOutputStreamOperator.getSideOutput(OutputTag)
- 7.OUT:DataStream -> Transformations -> DataStream -> DataSink
Flink CEP超时不足
和Flink窗口聚合类似,如果使用事件时间和依赖事件生成的水印向前推进,需要后续事件到达,才会触发窗口进行计算和输出结果。
完整Demo
public class CEPTimeoutEventJob{
private static final String LOCAL_KAFKA_BROKER = "localhost:9092";
private static final String GROUP_ID = CEPTimeoutEventJob.class.getSimpleName();
private static final String GROUP_TOPIC = GROUP_ID;
public static void main(String[] args) throws Exception{
// 参数
ParameterTool params = ParameterTool.fromArgs(args);
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
// 使用事件时间
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime);
env.enableCheckpointing(5000);
env.getCheckpointConfig().enableExternalizedCheckpoints
(CheckpointConfig.ExternalizedCheckpointCleanup.RETAIN_ON_CANCELLATION);
env.getConfig().disableSysoutLogging();
env.getConfig().setRestartStrategy(RestartStrategies.fixedDelayRestart(5,10000));
// 不使用POJO的时间
final AssignerWithPeriodicWatermarks extractor=new IngestionTimeExtractor<POJO();
// 与Kafka Topic的Partition保持一致
env.setParallelism(3);
Properties kafkaProps = new Properties();
kafkaProps.setProperty("bootstrap.servers", LOCAL_KAFKA_BROKER);
kafkaProps.setProperty("group.id", GROUP_ID);
// 接入Kafka的消息
FlinkKafkaConsumer011<POJO> consumer =
new FlinkKafkaConsumer011<>(GROUP_TOPIC, new POJOSchema(), kafkaProps);
DataStream<POJO> pojoDataStream = env.addSource(consumer)
.assignTimestampsAndWatermarks(extractor);
pojoDataStream.print();
//根据主键id分组,即对每一个POJO时间进行匹配检测[不同类型的POJO,可以采用不同的within时间]
//1.
DataStream<POJO> keyedPojos=pojoDataStream.keyBy("aid");
//从初始化到终态,一个完整的POJO事件序列
//2.
Pattern<Pojo,POJO> completedPojo=Pattern.<POJO>begin("init")
.where(new SimpleCondition<POJO>(){
private static final long serialVersionUID = -6847788055093903603L;
@Override
public boolean filter(POJO pojo) throws Exception{
return "02".equals(pojo.getAstatus());//只筛选状态为02的数据(初始化)
}
})
.followedBy("end")
.where(new SimpleCondition<POJO>(){
private static final long serialVersionUID = -2655089736460847552L;
@Override
public boolean filter(POJO pojo) throws Exception{
//筛选状态为成功 00或者状态为失败 01的数据。
return "00".equals(pojo.getAstatus())||"01".equals(pojo.getAstatus());
}
});
//找出一分钟内[便于测试]都没有到终态的事件aid
//如果针对不同类型有不同within时间,比如有的是超时1分钟,有的是超时一个小时,则生成多个PatternStream
PatternStream<POJO> patternStream=CEP.pattern(keyedPojos,completedPojo.within(Time.minutes(1)));
//定义侧面输出timeout
//4.
OutputTag<POJO> timedout=new OutputTag<POJO>("timedout"){
private static final long serialVersionUID = 773503794597666247L;
};
// OutputTag<L> timeoutOutputTag,
//PatternFlatTimeoutFunction<T, L> patternFlatTimeoutFunction,
//PatternFlatSelectFunction<T, R> patternFlatSelectFunction
//5.
SingleOutputStreamOperator<POJO> timeoutPojos=patternStream.flatSelect(
timedout,
new POJOTimedOut(),
new FlatSelectNothing()
);
//打印输出超时的POJO
//6,7
timeoutPojos.getSideOutput(timeOut).print();
timeoutPojos.print();
env.execute(CEPTimeoutEventJob.class.getSimpleName());
}
/**
把超时的事件收集起来
*/
public static class POJOTimedOut implements PatternFlatTimeout<POJO,POJO>{
private static final long serialVersionUID = -4214641891396057732L;
@Override
public void timeout(Map<String,List<POJO>> pattern,long timeStamp,Collector<POJO> out) throws Exception{
if(null != pattern.get("init")){
for(POJO pojoint:pattern.get("init")){
System.out.println("timeout init:"+pojoInit.getAid());
out.collect(pojoint);
}
}
//因为end 超时了,还没有收到end,所以这里是拿不到 end 的
System.out.println("timeout end"+map.get("end"));
}
}
/**
通常什么都不做,但也可以把所有匹配到的事件发往下游;如果是宽松邻近,被忽略或者穿透的事件就没办法选中发往下游了
一分钟时间内走完init和end的数据
*/
public static class FlatSelectNothing<T> implements PatternFlatSelectFunction<T,T>{
private static final long serialVersionUID = -3029589950677623844L;
@Override
public void flatSelect(Map<String,List<T>> pattern,Collector<T> collector){
System.out.println("flatSelect"+pattern);
}
}
}