一、简介
flink中的CEP(Complex Event Processing)是处理复杂事件流的库,可以自动在流式数据处理中对相关事件进行处理和检索。比如用一个用户在1min内连续登录的失败次数超过2次,则将该用户的登录的行为判断为恶意登录的行为。
本篇是CEP编程入门的第一篇
二、Maven依赖
<properties>
<scala.binary.version>2.11</scala.binary.version>
<flink.version>1.10.0</flink.version>
</properties>
<dependencies>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
<dependency>
<groupId>org.apache.flink</groupId>
<artifactId>flink-cep-scala_${scala.binary.version}</artifactId>
<version>${flink.version}</version>
</dependency>
</dependencies>
三、代码示例
// 定义输入数据的样例类
case class LoginEvent(userId: Long, ip: String, eventType: String, eventTime: Long)
object LoginFailWithCep {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
// 加载数据源
val dataStream = env.fromCollection(List(
LoginEvent(1, "192.168.0.1", "fail", 1558430832),
LoginEvent(1, "192.168.0.2", "fail", 1558430843),
LoginEvent(1, "192.168.0.3", "fail", 1558430844),
LoginEvent(2, "192.168.10.10", "success", 1558430845),
LoginEvent(2, "192.168.10.11", "fail", 1558430846),
LoginEvent(2, "192.168.10.12", "fail", 1558430847)
))
.assignAscendingTimestamps(_.eventTime * 1000)
.keyBy(_.userId)
// 其它定义pattern的几种方式
// Pattern.begin[LoginEvent]("start")
// .where(new IterativeCondition[LoginEvent] {
// override def filter(t: LoginEvent, context: IterativeCondition.Context[LoginEvent]): Boolean = t.eventType == "fail"
// })
//
// val pattern: Pattern[LoginEvent, LoginEvent] = Pattern.begin[LoginEvent]("start")
// .where(new SimpleCondition[LoginEvent] {
// override def filter(value: LoginEvent): Boolean = value.eventType == "fail"
// })
// .next("next")
// .where(new SimpleCondition[LoginEvent] {
// override def filter(value: LoginEvent): Boolean = value.eventType == "fail"
// })
// .times(2)
// .within(Time.seconds(2))
//
// val patternStream: PatternStream[LoginEvent] = CEP.pattern(dataStream, pattern)
// val value: DataStream[(String, String, String)] = patternStream.process(new PatternProcessFunction[LoginEvent, (String, String, String)] {
// override def processMatch(map: util.Map[String, util.List[LoginEvent]], context: PatternProcessFunction.Context, collector: Collector[(String, String, String)]): Unit = {
// val iter: util.Iterator[LoginEvent] = map.get().iterator()
// while (iter.hasNext) {
//
// val loginEvent: LoginEvent = iter.next()
// collector.collect((loginEvent.ip, loginEvent.userId.toString, loginEvent.eventType))
// }
// }
// })
// 定义一个匹配模式,next紧邻发生的事件,判断在2s内紧邻发生的事件是否为fail
val loginFailPattern = Pattern.begin[LoginEvent]("begin")
.where(_.eventType == "fail")
.next("next")
.where(_.eventType == "fail")
// .times(2) //匹配的次数为2
.within(Time.seconds(2))
//在keyBy之后的流中匹配出pattern stream
val loginPatternStream = CEP.pattern(dataStream, loginFailPattern)
import scala.collection.Map
val logFailDataStream = loginPatternStream.select(
(pattern: Map[String, Iterable[LoginEvent]]) => {
// 获取上一次失败的fail事件流
// val begin = pattern.getOrElse("begin", null).iterator.next()
val next = pattern.getOrElse("next", null).iterator.next()
(next.userId, next.ip, next.eventType)
}
)
logFailDataStream.print()
env.execute("LoginFailWithCep")
}
}
四、输出结果
(1,192.168.0.3,fail)
(2,192.168.10.12,fail)