对于次数的限制
import org.apache.flink.cep.scala.CEP
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.scala._
/**
* 量词的使用
*/
object FlinkCepTest1 {
case class LoginEvent(userId: String, ip: String, eventType: String, weight: Int)
def main(args: Array[String]): Unit = {
//初始化运行环境
val env = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
// 模拟数据源
val loginEventStream: DataStream[LoginEvent] = env.fromCollection(
List(
LoginEvent("1", "192.168.0.1", "fail", 8),
LoginEvent("1", "192.168.0.2", "fail", 9),
LoginEvent("1", "192.168.0.3", "fail", 10),
LoginEvent("1", "192.168.0.4", "fail", 10),
LoginEvent("2", "192.168.10.10", "success", -1),
LoginEvent("3", "192.168.10.10", "fail", 5),
LoginEvent("3", "192.168.10.11", "fail", 6),
LoginEvent("4", "192.168.10.10", "fail", 6),
LoginEvent("4", "192.168.10.11", "fail", 7),
LoginEvent("4", "192.168.10.12", "fail", 8),
LoginEvent("5", "192.168.10.13", "success", 8),
LoginEvent("5", "192.168.10.14", "success", 9),
LoginEvent("5", "192.168.10.15", "success", 10),
LoginEvent("6", "192.168.10.16", "fail", 6),
LoginEvent("6", "192.168.10.17", "fail", 8),
LoginEvent("6", "192.168.10.18", "fail", 8),
LoginEvent("7", "192.168.10.18", "fail", 5),
LoginEvent("6", "192.168.10.19", "fail", 10),
LoginEvent("6", "192.168.10.19", "fail", 9)
)
)
//定义模式
val loginFailPattern = Pattern.begin[LoginEvent]("start")
.where(_.eventType.equals("fail")) //一条登录失败
//.times(4) //模式发生次数, 将满足四次的数据输出,如果打印五次会匹配上两次
//.times(2, 4) //模式发生2,3,4次 , 会将满足两次或者三次或者四次所有的数据都会打印一遍
//.timesOrMore(5) //将满足大于等于五次的数据输出
//.oneOrMore //将满足一次或者多次的数据全部打印了一遍,每个数据对都进行互相匹配了一遍
.times(2).optional //将满足两次的数据配对打印
//进行分组匹配
val loginFailDataPattern = CEP.pattern(loginEventStream.keyBy(_.userId), loginFailPattern)
//查找符合规则的数据
val loginFailResult = loginFailDataPattern.select((pattern: collection.Map[String, Iterable[LoginEvent]]) => {
val maybeIterable: Option[Iterable[LoginEvent]] = pattern.get("start")
maybeIterable match {
case Some(value) => value
}
})
//打印测试
loginFailResult.print()
env.execute("FlinkCepTest1")
}
}
短时间内次数的限制
import org.apache.flink.streaming.api.scala.StreamExecutionEnvironment
import org.apache.flink.api.scala._
import org.apache.flink.cep.scala.{CEP, PatternStream}
import org.apache.flink.cep.scala.pattern.Pattern
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.windowing.time.Time
/**
* 需求:
* 用户2秒内登录失败次数超过2次的数据过滤出来
*
* 用户在短时间内频繁登录失败,可能存在恶意程序攻击
*/
object FlinkCepTest2 {
//定义样例类
case class LoginEvent(userId:Int, ip:String, eventType:String, eventTime:Long)
case class LoginWarning(firstUserId:Int, fEventTime:Long, sEventTime:Long)
def main(args: Array[String]): Unit = {
//获取执行环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//指定以eventTime时间来处理数据,默认使用prossingTime
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//设置并行度为1
env.setParallelism(1)
// 自定义测试数据
val loginStream = env.fromCollection( List(
LoginEvent(1, "192.168.0.1", "fail", 1558430842), //2019-05-21 17:27:22
LoginEvent(1, "192.168.0.2", "success", 1558430843), //2019-05-21 17:27:23
LoginEvent(1, "192.168.0.3", "fail", 1558430844), //2019-05-21 17:27:24
LoginEvent(1, "192.168.0.3", "fail", 1558430847), //2019-05-21 17:27:27
LoginEvent(1, "192.168.0.3", "fail", 1558430848), //2019-05-21 17:27:28
LoginEvent(2, "192.168.10.10", "success", 1558430850) //2019-05-21 17:27:30
)).assignAscendingTimestamps(_.eventTime * 1000)
//定义pattern,对事件流进行模式匹配
val logingFailPattern = Pattern.begin[LoginEvent]("begin")
.where(_.eventType == "fail") //找到失败的数据,一条条找
.next("next")
.where(_.eventType == "fail") //又进行匹配下一次失败
.within(Time.seconds(2)) //需要导入窗口函数的时间Time
//将输入的数据应用到pattern上
val patternStream: PatternStream[LoginEvent] = CEP.pattern(loginStream.keyBy(_.userId), logingFailPattern)
//查找符合规则的数据
val result = patternStream.select((pattern: collection.Map[String, Iterable[LoginEvent]]) => {
val firstPattern = pattern.getOrElse("begin", null).iterator.next()
val secondPattern = pattern.getOrElse("next", null).iterator.next()
LoginWarning(firstPattern.userId, firstPattern.eventTime, secondPattern.eventTime)
})
result.print()
env.execute()
}
}