//定义输出数据的样例类
case class UserBehavior(userId:Long,itemId:Long,categoryId:Int,behavior:String,timestamp:Long)
//定义窗口聚合结果样例类
case class ItemViewCount(itemId:Long,windowEnd:Long,count:Long)
object word1 {
def main(args: Array[String]): Unit = {
//向kafka发送数据
// writeData(“Kafkaflink”)
//创建批处理
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
//设置时间语义 eventTime 创建时间
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//获取数据
val datastream: DataStream[UserBehavior] = env.readTextFile(“C:\Intel\flinkcomm\input\UserBehavior.csv”)
.map(data => {
val dataArray: Array[String] = data.split(",")
UserBehavior(dataArray(0).trim.toLong, dataArray(1).trim.toLong, dataArray(2).trim.toInt, dataArray(3).trim, dataArray(4).trim.toLong)
})
.assignAscendingTimestamps(.timestamp*1000L)
val value: DataStream[String] = datastream
.filter(.behavior == “pv”)
.keyBy(.itemId)
.timeWindow(Time.minutes(1), Time.minutes(5))
.aggregate(new CountAgg(), new WindowResult())
.keyBy(.windowEnd)
.process(new TopNHotItems(5))
value.print()
env.execute()
}
def writeData(topic:String)={
val properties = new Properties()
properties.setProperty(“bootstrap.servers”, “hdp111:9092”)
// only required for Kafka 0.8
properties.setProperty(“key.serializer”, “org.apache.kafka.common.serialization.StringSerializer”)
properties.setProperty(“value.serializer”, “org.apache.kafka.common.serialization.StringSerializer”)
properties.setProperty(“group.id”, “test”)
val producer = new KafkaProducerString,String
//获取数据源
val source = Source.fromFile(“C:\Intel\flinkcomm\input\UserBehavior.csv”)
val strings = source.getLines()
for (elem<-strings){
//向kafka发送数据
val value = new ProducerRecordString,String
producer.send(value)
}
//关闭
producer.close()
}
}
//自定义聚合函数
class CountAgg() extends AggregateFunction[UserBehavior,Long,Long]{
override def createAccumulator(): Long = 0L
override def add(in: UserBehavior, acc: Long): Long = acc+1
override def getResult(acc: Long): Long = acc
override def merge(acc: Long, acc1: Long): Long = acc+acc1
}
//自定义窗口函数
class WindowResult() extends WindowFunction[Long,ItemViewCount,Long,TimeWindow]{
override def apply(key: Long, window: TimeWindow, input: Iterable[Long], out: Collector[ItemViewCount]): Unit = {
out.collect(ItemViewCount(key,window.getEnd,input.iterator.next()))
}
}
class TopNHotItems(topSize:Int) extends KeyedProcessFunction[Long,ItemViewCount,String]{
//定义一个状态对象itemState
private var itemState:ListState[ItemViewCount]=_
override def open(parameters: Configuration): Unit = {
itemState= getRuntimeContext.getListState(new ListStateDescriptor[ItemViewCount](“item-state”, classOf[ItemViewCount]))
}
override def processElement(value: ItemViewCount, context: KeyedProcessFunction[Long, ItemViewCount, String]#Context, collector: Collector[String]): Unit = {
//存入状态
itemState.add(value)
//定义定时器
context.timerService().registerEventTimeTimer(value.windowEnd+100)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, ItemViewCount, String]#OnTimerContext, out: Collector[String]): Unit = {
//对所有的数据取出 放入到ListBuffere
val allItems = new ListBuffer[ItemViewCount]
import scala.collection.JavaConversions._
for(item<-itemState.get()){
allItems+=item
}
//取出 释放
itemState.clear()
//按照count大小排序
val sortItems: ListBuffer[ItemViewCount] = allItems.sortBy(_.count)(Ordering.Long.reverse).take(topSize)
val result = new StringBuilder
result.append("时间:").append(new Timestamp(timestamp-100)).append("\n")
//输出每条信息
for(i<-sortItems.indices){
val currentItem: ItemViewCount = sortItems(i)
result.append("No").append(i+1).append(":")
.append(" 商品iD=").append(currentItem.itemId)
.append(" 浏览量=").append(currentItem.count)
.append("\n")
}
result.append("------------------------------")
Thread.sleep(1000)
out.collect(result.toString())
}
}