//输出数据的样例类
case class LogEvent(ip:String,userId:String,eventTime:Long,method:String,url:String)
//定义窗口聚合结果样例类
case class UrlViewCount(url:String,windowEnd:Long,count:Long)
object word2 {
def main(args: Array[String]): Unit = {
//创建批处理
val env = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
//设置时间语义
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
//获取数据源
val stream = env.readTextFile("C:\\Intel\\flinkcomm\\input\\apache.log")
val unit = stream.map(data => {
val dataArr = data.split(" ")
//获取时间戳
val timeStamp = new SimpleDateFormat("dd/MM/yyyy:HH:mm:ss").parse(dataArr(3).trim).getTime
LogEvent(dataArr(0).trim, dataArr(1).trim, timeStamp, dataArr(5).trim, dataArr(6).trim)
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[LogEvent](Time.seconds(60)) {
override def extractTimestamp(element: LogEvent): Long = element.eventTime
})
.keyBy(_.url)
.timeWindow(Time.minutes(1),Time.seconds(5))
.aggregate(new Count(), new Window())
.keyBy(_.windowEnd)
.process(new Toppaix(5))
.print()
env.execute()
}
}
//自定义聚合函数
class Count() extends AggregateFunction[LogEvent,Long,Long]{
override def createAccumulator(): Long = 0L
override def add(in: LogEvent, acc: Long): Long = acc+1
override def getResult(acc: Long): Long = acc
override def merge(acc: Long, acc1: Long): Long = acc+acc1
}
class Window() extends WindowFunction[Long,UrlViewCount, String, TimeWindow]{
override def apply(key: String, window: TimeWindow, input: Iterable[Long], out: Collector[UrlViewCount]): Unit = {
out.collect(UrlViewCount(key,window.getEnd,input.iterator.next()))
}
}
class Toppaix(i: Int)extends KeyedProcessFunction[Long,UrlViewCount,String]{
//定义一个状态listState,用来保存所有的UrlViewCount
private var urlState : ListState[UrlViewCount] = _
//对初始状态做个声明
override def open(parameters: Configuration): Unit = {
urlState = getRuntimeContext.getListState(new ListStateDescriptorUrlViewCount)
}
override def processElement(value: UrlViewCount, ctx: KeyedProcessFunction[Long, UrlViewCount, String]#Context, out: Collector[String]): Unit = {
//每条数据都存到state
urlState.add(value)
//注册定时器
ctx.timerService().registerEventTimeTimer(value.windowEnd + 1)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, UrlViewCount, String]#OnTimerContext, out: Collector[String]): Unit = {
val allUrlView: ListBuffer[UrlViewCount] = new ListBuffer[UrlViewCount]()
val iter = urlState.get().iterator()
while (iter.hasNext){
allUrlView += iter.next()
}
urlState.clear()
//基于count大小进行排序
val sortedUrlView = allUrlView.sortWith(_.count > _.count).take(5)
//格式化成string输出
val result = new StringBuilder
result.append("时间:").append(new Timestamp(timestamp - 1)).append("\n")
for (elem <- sortedUrlView.indices) {
val currentUrlView : UrlViewCount = sortedUrlView(elem)
result.append("No").append(elem + 1).append(":")
.append("url=").append(currentUrlView.url)
.append("流量=").append(currentUrlView.count).append("\n")
}
result.append("------------------------------")
Thread.sleep(1000)
out.collect(result.toString())
}
}