热门商品和读取本地文件由kafka发送数据

最新推荐文章于 2021-01-01 23:23:20 发布

yiyiqi123

最新推荐文章于 2021-01-01 23:23:20 发布

阅读量203

点赞数

分类专栏：随机文章标签： flink

本文链接：https://blog.csdn.net/yiyiqi123/article/details/105498224

版权

随机专栏收录该内容

21 篇文章 0 订阅

订阅专栏

//定义输出数据的样例类
case class UserBehavior(userId:Long,itemId:Long,categoryId:Int,behavior:String,timestamp:Long)
//定义窗口聚合结果样例类
case class ItemViewCount(itemId:Long,windowEnd:Long,count:Long)
object word1 {
def main(args: Array[String]): Unit = {
//向kafka发送数据
// writeData(“Kafkaflink”)
//创建批处理
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
//设置时间语义 eventTime 创建时间
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//获取数据
val datastream: DataStream[UserBehavior] = env.readTextFile(“C:\Intel\flinkcomm\input\UserBehavior.csv”)
.map(data => {
val dataArray: Array[String] = data.split(",")
UserBehavior(dataArray(0).trim.toLong, dataArray(1).trim.toLong, dataArray(2).trim.toInt, dataArray(3).trim, dataArray(4).trim.toLong)
})
.assignAscendingTimestamps(.timestamp*1000L)
val value: DataStream[String] = datastream
.filter(.behavior == “pv”)
.keyBy(.itemId)
.timeWindow(Time.minutes(1), Time.minutes(5))
.aggregate(new CountAgg(), new WindowResult())
.keyBy(.windowEnd)
.process(new TopNHotItems(5))

value.print()
env.execute()

}
def writeData(topic:String)={
val properties = new Properties()
properties.setProperty(“bootstrap.servers”, “hdp111:9092”)
// only required for Kafka 0.8
properties.setProperty(“key.serializer”, “org.apache.kafka.common.serialization.StringSerializer”)
properties.setProperty(“value.serializer”, “org.apache.kafka.common.serialization.StringSerializer”)
properties.setProperty(“group.id”, “test”)
val producer = new KafkaProducerString,String
//获取数据源
val source = Source.fromFile(“C:\Intel\flinkcomm\input\UserBehavior.csv”)
val strings = source.getLines()
for (elem<-strings){
//向kafka发送数据
val value = new ProducerRecordString,String
producer.send(value)
}

//关闭
producer.close()

}

}
//自定义聚合函数
class CountAgg() extends AggregateFunction[UserBehavior,Long,Long]{
override def createAccumulator(): Long = 0L

override def add(in: UserBehavior, acc: Long): Long = acc+1

override def getResult(acc: Long): Long = acc

override def merge(acc: Long, acc1: Long): Long = acc+acc1
}
//自定义窗口函数
class WindowResult() extends WindowFunction[Long,ItemViewCount,Long,TimeWindow]{
override def apply(key: Long, window: TimeWindow, input: Iterable[Long], out: Collector[ItemViewCount]): Unit = {
out.collect(ItemViewCount(key,window.getEnd,input.iterator.next()))
}
}
class TopNHotItems(topSize:Int) extends KeyedProcessFunction[Long,ItemViewCount,String]{

//定义一个状态对象itemState
private var itemState:ListState[ItemViewCount]=_

override def open(parameters: Configuration): Unit = {
itemState= getRuntimeContext.getListState(new ListStateDescriptor[ItemViewCount](“item-state”, classOf[ItemViewCount]))
}

override def processElement(value: ItemViewCount, context: KeyedProcessFunction[Long, ItemViewCount, String]#Context, collector: Collector[String]): Unit = {
//存入状态
itemState.add(value)
//定义定时器
context.timerService().registerEventTimeTimer(value.windowEnd+100)

}

override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, ItemViewCount, String]#OnTimerContext, out: Collector[String]): Unit = {

//对所有的数据取出  放入到ListBuffere
val allItems = new ListBuffer[ItemViewCount]
import scala.collection.JavaConversions._
for(item<-itemState.get()){
  allItems+=item
}
//取出  释放
itemState.clear()
//按照count大小排序
val sortItems: ListBuffer[ItemViewCount] = allItems.sortBy(_.count)(Ordering.Long.reverse).take(topSize)
val result = new StringBuilder
result.append("时间:").append(new Timestamp(timestamp-100)).append("\n")
//输出每条信息
for(i<-sortItems.indices){
  val currentItem: ItemViewCount = sortItems(i)
  result.append("No").append(i+1).append(":")
    .append(" 商品iD=").append(currentItem.itemId)
    .append(" 浏览量=").append(currentItem.count)
    .append("\n")
}
result.append("------------------------------")
Thread.sleep(1000)
out.collect(result.toString())

}
}

yiyiqi123

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
热门商品和读取本地文件由kafka发送数据

//定义输出数据的样例类case class UserBehavior(userId:Long,itemId:Long,categoryId:Int,behavior:String,timestamp:Long)//定义窗口聚合结果样例类case class ItemViewCount(itemId:Long,windowEnd:Long,count:Long)object word1 {...
复制链接

扫一扫