*本文用到的数据为ais船舶数据AdClickLog.csv可自取
链接:https://pan.baidu.com/s/1C2CkWzwoM4fhcp3NrRnFJQ?pwd=fx11
提取码:fx11
案例1:判断同一个用户点击同一个广告的次数,超过阈值则报警
**创建样例类caseClass.scala,创建广告点击样例类
//广告点击样例类
case class AdCase(user:Long,ad:Long,province:String,city:String,time:Long)
import org.apache.flink.api.scala._
import org.apache.flink.api.common.eventtime.WatermarkStrategy
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.api.common.state.{ValueState, ValueStateDescriptor}
import org.apache.flink.connector.kafka.source.KafkaSource
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
object ClickWarn {
def main(args: Array[String]): Unit = {
//【Q1:判断同一个用户点击同一个广告如果超过一百次,我们认为存在刷单行为,发出预警】
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
val kafkaConsumer = KafkaSource.builder[String]()
//设置kafka的地址
.setBootstrapServers("192.168.126.51:9092,192.168.126.52:9092,192.168.126.53:9092")
//设置组别,用来区分不同的人消费同一个主题的数据
.setGroupId("paipai")
//设置主题
.setTopics("flink")
//设置是从头消费还是从最新位置开始消费
.setStartingOffsets(OffsetsInitializer.latest())
//对value进行 反序列化
.setValueOnlyDeserializer(new SimpleStringSchema())
//这个不能丢
.build()
val value: DataStream[String] = env
.fromSource(kafkaConsumer, WatermarkStrategy.noWatermarks(), "kafka")
value
.map(x=>{
//分割数据
val strings: Array[String] = x.split(",")
//封装样例类
AdCase(strings(0).toLong,strings(1).toLong,
strings(2),strings(3),strings(4).toLong)
})
//根据用户和广告进行分组
.keyBy(x=>(x.user,x.ad))
.process(new AdWarn(100))
.print()
env.execute()
}
}
//根据传参的方式来传入阈值
class AdWarn(x:Int) extends KeyedProcessFunction[(Long,Long),AdCase,String]{
//定义1个状态变量
//状态变量1:同一个用户点击同一个广告的次数
lazy val count: ValueState[Int] = getRuntimeContext
.getState(new ValueStateDescriptor[Int]("count", classOf[Int]))
override def processElement(i: AdCase,
context: KeyedProcessFunction[(Long, Long), AdCase, String]#Context,
collector: Collector[String]): Unit = {
//更新次数,来一条数据调用一次processElement函数
count.update(count.value()+1)
//x是传入的参数,可以传参
if (count.value()>x) {
//在数量>100时输出
collector
.collect(s"${i.user}用户点击${i.ad}广告超过100次,我们认为存在刷单行为,发出预警")
//输出报警信息后就清空状态变量的值
count.clear()
}
}
}