import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.functions.PatternProcessFunction;
import org.apache.flink.cep.functions.TimedOutPartialMatchHandler;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.time.Duration;
import java.util.List;
import java.util.Map;
/**
* @author jiasongfan
* @date 2022/7/26
* @apiNote
*/
public class CEPTest05 {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<LoginEvent> timeDS = env.fromElements(
new LoginEvent("user_1", "order_1", "create", 1000L),
new LoginEvent("user_2", "order_2", "create", 2000L),
new LoginEvent("user_1", "order_1", "modify", 10*1000L),
new LoginEvent("user_1", "order_1", "pay", 60*1000L),
new LoginEvent("user_2", "order_3", "create", 10*60*1000L),
new LoginEvent("user_2", "order_3", "pay", 20*60*1000L),
new LoginEvent("user_2", "order_2", "create", 30*60*1000L)
).assignTimestampsAndWatermarks(WatermarkStrategy
.<LoginEvent>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner((event, timestamp) -> event.timestamp));
KeyedStream<LoginEvent, String> keyDS = timeDS.keyBy(data -> data.ipAddress);
Pattern<LoginEvent, LoginEvent> pattern = Pattern.<LoginEvent>begin("create").where(
new SimpleCondition<LoginEvent>() {
@Override
public boolean filter(LoginEvent event) {
return event.eventType.equals("create");
}
}
).followedBy("pay").subtype(LoginEvent.class).where(
new SimpleCondition<LoginEvent>() {
@Override
public boolean filter(LoginEvent subEvent) {
return subEvent.eventType.equals("pay");
}
}
).within(Time.minutes(15));
PatternStream<LoginEvent> patternStream = CEP.pattern(keyDS, pattern);
OutputTag<String> outputTag = new OutputTag<String>("late_data") {
};
SingleOutputStreamOperator<String> process = patternStream.process(new MyPro2());
process.print("正常");
process.getSideOutput(outputTag).print("未支付");
env.execute();
}
public static class MyPro2 extends PatternProcessFunction<LoginEvent,String> implements TimedOutPartialMatchHandler<LoginEvent>{
@Override
public void processMatch(Map<String, List<LoginEvent>> map, Context context, Collector<String> collector) throws Exception {
LoginEvent pay = map.get("pay").get(0);
collector.collect(pay.ipAddress+"已支付");
}
@Override
public void processTimedOutMatch(Map<String, List<LoginEvent>> map, Context context) throws Exception {
LoginEvent create = map.get("create").get(0);
//测流必须相同
OutputTag<String> outputTag = new OutputTag<String>("late_data") {
};
context.output(outputTag,create.ipAddress+"未支付");
}
}
}
获取连续登录失败
/**
* @author jiasongfan
* @date 2022/7/26
* @apiNote
*/
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternSelectFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import java.time.Duration;
import java.util.List;
import java.util.Map;
public class CEPTest {
public static void main(String[] args) throws Exception {
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
env.setParallelism(1);
SingleOutputStreamOperator<LoginEvent> timeDS = env.fromElements(
new LoginEvent("user_1", "192.168.0.1", "fail", 2000L),
new LoginEvent("user_1", "192.168.0.2", "fail", 3000L),
new LoginEvent("user_2", "192.168.1.29", "fail", 4000L),
new LoginEvent("user_1", "172.56.23.10", "fail", 5000L),
new LoginEvent("user_2", "192.168.1.29", "success", 6000L),
new LoginEvent("user_2", "192.168.1.29", "fail", 7000L),
new LoginEvent("user_2", "192.168.1.29", "fail", 8000L)
).assignTimestampsAndWatermarks(WatermarkStrategy
.<LoginEvent>forBoundedOutOfOrderness(Duration.ofSeconds(20))
.withTimestampAssigner((event, timestamp) -> event.timestamp));
KeyedStream<LoginEvent, String> keyDS = timeDS.keyBy(data -> data.userId);
//连续三次登录失败
Pattern<LoginEvent, LoginEvent> pattern = Pattern.<LoginEvent>begin("first").where(
new SimpleCondition<LoginEvent>() {
@Override
public boolean filter(LoginEvent event) {
return event.eventType.equals("fail");
}
}
)
.times(3);
// .next("second").subtype(LoginEvent.class).where(
// new SimpleCondition<LoginEvent>() {
// @Override
// public boolean filter(LoginEvent subEvent) {
// return subEvent.eventType.equals("fail");
// }
// }
//).next("third").subtype(LoginEvent.class).where(
// new SimpleCondition<LoginEvent>() {
// @Override
// public boolean filter(LoginEvent subEvent) {
// return subEvent.eventType.equals("fail");
// }
// }
//);
PatternStream<LoginEvent> patternStream = CEP.pattern(keyDS, pattern);
//4.将检测到的复杂事件提取出来,进行处理得到报警信息输出
SingleOutputStreamOperator<String> select = patternStream.select(new PatternSelectFunction<LoginEvent, String>() {
@Override
public String select(Map<String, List<LoginEvent>> map) throws Exception {
//提取复杂事件的三次登录失败时间
LoginEvent first = map.get("first").get(0);
// LoginEvent second = map.get("second").get(0);
// LoginEvent third = map.get("third").get(0);
return first.userId+"连续三次登录失败,登录时间"
+first.timestamp+ ","
// +second.timestamp+ ","+
// +third.timestamp
;
}
});
select.print();
env.execute("Window WordCount");
}
}
cep json
/**
* @author jiasongfan
* @date 2022/7/29
* @apiNote
*/
import com.alibaba.fastjson.JSONObject;
import jdk.nashorn.internal.scripts.JO;
import org.apache.flink.api.common.eventtime.WatermarkStrategy;
import org.apache.flink.api.common.functions.RichMapFunction;
import org.apache.flink.api.common.serialization.SimpleStringSchema;
import org.apache.flink.cep.CEP;
import org.apache.flink.cep.PatternSelectFunction;
import org.apache.flink.cep.PatternStream;
import org.apache.flink.cep.PatternTimeoutFunction;
import org.apache.flink.cep.functions.PatternProcessFunction;
import org.apache.flink.cep.functions.TimedOutPartialMatchHandler;
import org.apache.flink.cep.pattern.Pattern;
import org.apache.flink.cep.pattern.conditions.SimpleCondition;
import org.apache.flink.streaming.api.datastream.DataStream;
import org.apache.flink.streaming.api.datastream.KeyedStream;
import org.apache.flink.streaming.api.datastream.SingleOutputStreamOperator;
import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
import org.apache.flink.streaming.api.windowing.time.Time;
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer;
import org.apache.flink.util.Collector;
import org.apache.flink.util.OutputTag;
import java.time.Duration;
import java.util.List;
import java.util.Map;
import java.util.Properties;
public class Test01 {
public static void main(String[] args) throws Exception {
//2)创建maven项目,导入相关flink依赖(10分)
//3)设置流环境变量(10分)
StreamExecutionEnvironment env = StreamExecutionEnvironment.getExecutionEnvironment();
//4)设置并行度为1(10分)
env.setParallelism(1);
//5)获取kafak的dwd_page_log数据源(10分)
Properties properties = new Properties();
properties.setProperty("bootstrap.servers", "hdp1:9092");
properties.setProperty("group.id", "test");
DataStream<String> stream = env
.addSource(new FlinkKafkaConsumer<>("dwd_page_log", new SimpleStringSchema(), properties));
//创建测试数据
DataStream<String> dataStream = env
.fromElements(
"{\"common\":{\"mid\":\"101\"},\"page\":{\"page_id\":\"home\"},\"ts\":10000} ",
"{\"common\":{\"mid\":\"102\"},\"page\":{\"page_id\":\"home\"},\"ts\":12000}",
"{\"common\":{\"mid\":\"102\"},\"page\":{\"page_id\":\"good_list\",\"last_page_id\":" +
"\"home\"},\"ts\":15000} ",
"{\"common\":{\"mid\":\"102\"},\"page\":{\"page_id\":\"good_list\",\"last_page_id\":" +
"\"detail\"},\"ts\":30000}"
);
//6)将数据转换成json对象并指定事件时间(10分)
SingleOutputStreamOperator<JSONObject> map = dataStream.map(data -> JSONObject.parseObject(data));
//指定事件时间
SingleOutputStreamOperator<JSONObject> timeDS = map.assignTimestampsAndWatermarks(WatermarkStrategy
.<JSONObject>forBoundedOutOfOrderness(Duration.ofSeconds(0))
.withTimestampAssigner((event, timestamp) -> event.getLong("ts")));
//分组
KeyedStream<JSONObject, String> keyDS = timeDS.keyBy(data -> data.getJSONObject("common").getString("mid"));
//7)定义cep模式序列开始没有last_page_id的数据(5分)
// keyDS.print();
//8)定义cep模式序列next没有last_page_id的数据(5分)
//9)定义cep模式序列时间设置10S内(5分)
Pattern<JSONObject, JSONObject> pattern = Pattern.<JSONObject>begin("start").where(
new SimpleCondition<JSONObject>() {
@Override
public boolean filter(JSONObject event) {
String string = event.getJSONObject("page").getString("last_page_id");
if(string== null || string.length()==0){
//返回true表述符合筛选条件
return true;
}
return false;
}
}
).next("next").subtype(JSONObject.class).where(
new SimpleCondition<JSONObject>() {
@Override
public boolean filter(JSONObject subEvent) {
String string = subEvent.getJSONObject("page").getString("page_id");
if(string!=null&& string.length()>0){
return true;
}
return false;
}
}
).within(Time.seconds(10));
PatternStream<JSONObject> patternStream = CEP.pattern(keyDS, pattern);
//10)讲模式序列添加到流中(5分)
//SingleOutputStreamOperator<String> select = patternStream.select(new PatternSelectFunction<JSONObject, String>() {
// @Override
// public String select(Map<String, List<JSONObject>> map) throws Exception {
// JSONObject start = map.get("start").get(0);
// return start + "正常";
// }
//});
OutputTag<String> tag1 = new OutputTag<String>("late-data"){};
SingleOutputStreamOperator<String> process = patternStream.process(new MyCERPro());
// process.print("非跳出");
process.getSideOutput(tag1).print("跳出用户");
// map.print();
env.execute("Window WordCount");
}
public static class MyCERPro extends PatternProcessFunction<JSONObject,String> implements TimedOutPartialMatchHandler<JSONObject>{
@Override
public void processMatch(Map<String, List<JSONObject>> map, Context context, Collector<String> collector) throws Exception {
}
@Override
public void processTimedOutMatch(Map<String, List<JSONObject>> map, Context context) throws Exception {
JSONObject start = map.get("start").get(0);
OutputTag<String> tag1 = new OutputTag<String>("late-data"){};
context.output(tag1,start+"跳出用户");
}
}
}