计算商品流量数据

//输出数据的样例类
case class LogEvent(ip:String,userId:String,eventTime:Long,method:String,url:String)
//定义窗口聚合结果样例类
case class UrlViewCount(url:String,windowEnd:Long,count:Long)
object word2 {

def main(args: Array[String]): Unit = {
//创建批处理
val env = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度
env.setParallelism(1)
//设置时间语义
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

//获取数据源
val stream = env.readTextFile("C:\\Intel\\flinkcomm\\input\\apache.log")
val unit = stream.map(data => {
  val dataArr = data.split(" ")
  //获取时间戳
  val timeStamp = new SimpleDateFormat("dd/MM/yyyy:HH:mm:ss").parse(dataArr(3).trim).getTime
  LogEvent(dataArr(0).trim, dataArr(1).trim, timeStamp, dataArr(5).trim, dataArr(6).trim)
}).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[LogEvent](Time.seconds(60)) {
  override def extractTimestamp(element: LogEvent): Long = element.eventTime
})
  .keyBy(_.url)
  .timeWindow(Time.minutes(1),Time.seconds(5))
  .aggregate(new Count(), new Window())
  .keyBy(_.windowEnd)
  .process(new Toppaix(5))
  .print()


env.execute()

}

}

//自定义聚合函数
class Count() extends AggregateFunction[LogEvent,Long,Long]{
override def createAccumulator(): Long = 0L

override def add(in: LogEvent, acc: Long): Long = acc+1

override def getResult(acc: Long): Long = acc

override def merge(acc: Long, acc1: Long): Long = acc+acc1
}

class Window() extends WindowFunction[Long,UrlViewCount, String, TimeWindow]{
override def apply(key: String, window: TimeWindow, input: Iterable[Long], out: Collector[UrlViewCount]): Unit = {
out.collect(UrlViewCount(key,window.getEnd,input.iterator.next()))
}
}
class Toppaix(i: Int)extends KeyedProcessFunction[Long,UrlViewCount,String]{

//定义一个状态listState,用来保存所有的UrlViewCount
private var urlState : ListState[UrlViewCount] = _

//对初始状态做个声明
override def open(parameters: Configuration): Unit = {
urlState = getRuntimeContext.getListState(new ListStateDescriptorUrlViewCount)
}

override def processElement(value: UrlViewCount, ctx: KeyedProcessFunction[Long, UrlViewCount, String]#Context, out: Collector[String]): Unit = {

//每条数据都存到state
urlState.add(value)
//注册定时器
ctx.timerService().registerEventTimeTimer(value.windowEnd + 1)

}

override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, UrlViewCount, String]#OnTimerContext, out: Collector[String]): Unit = {

val allUrlView: ListBuffer[UrlViewCount] = new ListBuffer[UrlViewCount]()
val iter = urlState.get().iterator()

while (iter.hasNext){
  allUrlView += iter.next()
}

urlState.clear()

//基于count大小进行排序
val sortedUrlView = allUrlView.sortWith(_.count > _.count).take(5)
//格式化成string输出
val result = new StringBuilder

result.append("时间:").append(new Timestamp(timestamp - 1)).append("\n")

for (elem <- sortedUrlView.indices) {
  val currentUrlView : UrlViewCount = sortedUrlView(elem)
  result.append("No").append(elem + 1).append(":")
    .append("url=").append(currentUrlView.url)
    .append("流量=").append(currentUrlView.count).append("\n")
}
result.append("------------------------------")
Thread.sleep(1000)

out.collect(result.toString())

}
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值