chapter07

EventTimeTimerTest

(事件计时器测试)

package com.liao.chapter07

import com.liao.chapter05.{ClickSource, Event}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object EventTimeTimerTest {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new CustomSource)
      .assignAscendingTimestamps(_.timestamp)

    stream.keyBy( data => true )
      .process(new KeyedProcessFunction[Boolean,Event,String] {
        override def processElement(i: Event, context: KeyedProcessFunction[Boolean, Event, String]#Context, collector: Collector[String]): Unit = {
          val currentTime = context.timerService().currentWatermark()
          collector.collect(s"数据到达,当前时间是:$currentTime, 当前数据时间戳是: ${i.timestamp}")
          //注册一个5秒之后的定时器
          context.timerService().registerEventTimeTimer(currentTime + 5 * 1000)
        }
        //定义定时器触发时的执行逻辑
        override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Boolean, Event, String]#OnTimerContext, out: Collector[String]): Unit = {
          out.collect("定时器触发,触发时间为:" + timestamp)
        }
      })
      .print()

    env.execute()

  }

  class CustomSource extends SourceFunction[Event]{
    override def run(sourceContext: SourceFunction.SourceContext[Event]): Unit = {
      //直接发出测试数据
      sourceContext.collect(Event("Mary","./home",1000L))

      //间隔5秒钟
      Thread.sleep(5000)


      //继续发出数据
      sourceContext.collect(Event("Mary","./home",2000L))
      Thread.sleep(5000)


      sourceContext.collect(Event("Mary","./home",6000L))
      Thread.sleep(5000)
    }

    override def cancel(): Unit = ???
  }
}

ProcessFunctionTest

package com.liao.chapter07

import com.liao.chapter05.{ClickSource, Event}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object ProcessFunctionTest {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new ClickSource)
      .assignAscendingTimestamps(_.timestamp)


    stream.process(new ProcessFunction[Event, String] {
      override def processElement(i: Event, context: ProcessFunction[Event, String]#Context, collector: Collector[String]): Unit = {
        if(i.user.equals("Mary")){
          collector.collect(i.user)
        }
        else if(i.user.equals("Bob")){
          collector.collect(i.user)
          collector.collect(i.url)
        }
        println(getRuntimeContext.getIndexOfThisSubtask)
        println(context.timerService().currentWatermark())
      }
    })
      .print()

    env.execute()

  }
}

ProcessingTimeTest

(处理时间测试)

package com.liao.chapter07

import com.liao.chapter05.{ClickSource, Event}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object ProcessingTimeTest {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new ClickSource)
      .assignAscendingTimestamps(_.timestamp)

    stream.keyBy(data => true)
      .process(new KeyedProcessFunction[Boolean,Event, String] {
        override def processElement(i: Event, context: KeyedProcessFunction[Boolean, Event, String]#Context, collector: Collector[String]): Unit = {
          val currentTime = context.timerService().currentProcessingTime()
          collector.collect("数据到达,当前时间是:"+ currentTime)
          //注册一个5秒之后的定时器
          context.timerService().registerProcessingTimeTimer(currentTime + 5 * 1000)
        }

        //定义定时器触发时的执行逻辑
        override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Boolean, Event, String]#OnTimerContext, out: Collector[String]): Unit =
          out.collect("定时器触发,触发时间为: " + timestamp)
      })
      .print()

    env.execute()

  }
}

TopNKeyedProcessFunctionExample

package com.liao.chapter07

import com.liao.chapter05.ClickSource
import com.liao.chapter06.UrlViewCount
import com.liao.chapter06.UrlViewCountExample.{UrlViewCountAgg, UrlViewCountResult}
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector

import scala.collection.convert.ImplicitConversionsToScala.`iterable AsScalaIterable`
import scala.collection.mutable

object TopNKeyedProcessFunctionExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new ClickSource)
      .assignAscendingTimestamps(_.timestamp)


    //结合使用增量聚合函数和全窗口函数,统计每个url的访问频次
    val urlCountStream = stream.keyBy(_.url)
      .window(SlidingEventTimeWindows.of(Time.seconds(10), Time.seconds(5)))
      .aggregate(new UrlViewCountAgg, new UrlViewCountResult)


    // 2.按照窗口信息进行分组提取,排序输出
    val resultStream = urlCountStream.keyBy(_.windowEnd)
      .process(new TopN(2))

    resultStream.print()


    env.execute()

  }

  //实现自定义keyedProcessFunction
  class  TopN(i: Int) extends KeyedProcessFunction[Long,UrlViewCount,String]{

    // 声明列表状态
    var urlViewCountListState: ListState[UrlViewCount] = _

    override def open(parameters: Configuration): Unit = {
       urlViewCountListState = getRuntimeContext.getListState(new ListStateDescriptor[UrlViewCount]("list-state", classOf[UrlViewCount]))
    }

    override def processElement(i: UrlViewCount, context: KeyedProcessFunction[Long, UrlViewCount, String]#Context, collector: Collector[String]): Unit = {
      //每来一个数据,就直接放入ListState中
      urlViewCountListState.add(i)
      //注册一个窗口结束时间1ms之后的定时器
      context.timerService().registerEventTimeTimer(i.windowEnd + 1)
    }

    override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, UrlViewCount, String]#OnTimerContext, out: Collector[String]): Unit ={
      //先把数据提取出来放到List里
      val urlViewCountList = urlViewCountListState.get().toList
      val topnList = urlViewCountList.sortBy(-_.count).take(i)

      //结果包装输出
      val result = new mutable.StringBuilder()
      result.append(s"===========窗口:${timestamp - 1 - 10000} ~ ${timestamp-1}==========\n")
      for (i <- topnList.indices){
        var urlViewCount = topnList(i)
        result.append(s"浏览量Top ${i+1} ")
          .append(s"url: ${urlViewCount.url}")
          .append(s"浏览量是:${urlViewCount.count} \n")
      }

      out.collect(result.toString())

    }

  }

}

TopNProcessAllWindowExample

package com.liao.chapter07


import com.liao.chapter05.ClickSource
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

import scala.collection.mutable

object TopNProcessAllWindowExample {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val stream = env.addSource(new ClickSource)
      .assignAscendingTimestamps(_.timestamp)

    //直接开窗统计
    stream.map(_.url)
      .windowAll(SlidingEventTimeWindows.of(Time.seconds(10),Time.seconds(5)))
      .process(new ProcessAllWindowFunction[String, String, TimeWindow] {
        override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {
          //1.统计每个url的访问次数
          //初始化一个Map。以url作为key,以count值作为value
          val urlCountMap = mutable.Map[String, Long]()
          elements.foreach(
            data => urlCountMap.get(data) match {
              case Some(count) => urlCountMap.put(data,count+1)
              case None => urlCountMap.put(data,1L)
            }
          )

          //2.对数据进行排序提取
          val urlCountList = urlCountMap.toList.sortBy(-_._2).take(2)

          //3.包装信息打印输出
          val result = new mutable.StringBuilder()
          result.append(s"===========窗口:${context.window.getStart} ~ ${context.window.getEnd}==========\n")
          for (i <- urlCountList.indices){
            var tuple = urlCountList(i)
            result.append(s"浏览量Top ${i+1} ")
              .append(s"url: ${tuple._1}")
              .append(s"浏览量是:${tuple._2} \n")
          }

          out.collect(result.toString())
        }
      })
      .print()

    env.execute()


  }
}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值