EventTimeTimerTest
(事件计时器测试)
package com.liao.chapter07
import com.liao.chapter05.{ClickSource, Event}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.functions.source.SourceFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object EventTimeTimerTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val stream = env.addSource(new CustomSource)
.assignAscendingTimestamps(_.timestamp)
stream.keyBy( data => true )
.process(new KeyedProcessFunction[Boolean,Event,String] {
override def processElement(i: Event, context: KeyedProcessFunction[Boolean, Event, String]#Context, collector: Collector[String]): Unit = {
val currentTime = context.timerService().currentWatermark()
collector.collect(s"数据到达,当前时间是:$currentTime, 当前数据时间戳是: ${i.timestamp}")
context.timerService().registerEventTimeTimer(currentTime + 5 * 1000)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Boolean, Event, String]#OnTimerContext, out: Collector[String]): Unit = {
out.collect("定时器触发,触发时间为:" + timestamp)
}
})
.print()
env.execute()
}
class CustomSource extends SourceFunction[Event]{
override def run(sourceContext: SourceFunction.SourceContext[Event]): Unit = {
sourceContext.collect(Event("Mary","./home",1000L))
Thread.sleep(5000)
sourceContext.collect(Event("Mary","./home",2000L))
Thread.sleep(5000)
sourceContext.collect(Event("Mary","./home",6000L))
Thread.sleep(5000)
}
override def cancel(): Unit = ???
}
}
ProcessFunctionTest
package com.liao.chapter07
import com.liao.chapter05.{ClickSource, Event}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object ProcessFunctionTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val stream = env.addSource(new ClickSource)
.assignAscendingTimestamps(_.timestamp)
stream.process(new ProcessFunction[Event, String] {
override def processElement(i: Event, context: ProcessFunction[Event, String]#Context, collector: Collector[String]): Unit = {
if(i.user.equals("Mary")){
collector.collect(i.user)
}
else if(i.user.equals("Bob")){
collector.collect(i.user)
collector.collect(i.url)
}
println(getRuntimeContext.getIndexOfThisSubtask)
println(context.timerService().currentWatermark())
}
})
.print()
env.execute()
}
}
ProcessingTimeTest
(处理时间测试)
package com.liao.chapter07
import com.liao.chapter05.{ClickSource, Event}
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object ProcessingTimeTest {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val stream = env.addSource(new ClickSource)
.assignAscendingTimestamps(_.timestamp)
stream.keyBy(data => true)
.process(new KeyedProcessFunction[Boolean,Event, String] {
override def processElement(i: Event, context: KeyedProcessFunction[Boolean, Event, String]#Context, collector: Collector[String]): Unit = {
val currentTime = context.timerService().currentProcessingTime()
collector.collect("数据到达,当前时间是:"+ currentTime)
context.timerService().registerProcessingTimeTimer(currentTime + 5 * 1000)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Boolean, Event, String]#OnTimerContext, out: Collector[String]): Unit =
out.collect("定时器触发,触发时间为: " + timestamp)
})
.print()
env.execute()
}
}
TopNKeyedProcessFunctionExample
package com.liao.chapter07
import com.liao.chapter05.ClickSource
import com.liao.chapter06.UrlViewCount
import com.liao.chapter06.UrlViewCountExample.{UrlViewCountAgg, UrlViewCountResult}
import org.apache.flink.api.common.state.{ListState, ListStateDescriptor}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.KeyedProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
import scala.collection.convert.ImplicitConversionsToScala.`iterable AsScalaIterable`
import scala.collection.mutable
object TopNKeyedProcessFunctionExample {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val stream = env.addSource(new ClickSource)
.assignAscendingTimestamps(_.timestamp)
val urlCountStream = stream.keyBy(_.url)
.window(SlidingEventTimeWindows.of(Time.seconds(10), Time.seconds(5)))
.aggregate(new UrlViewCountAgg, new UrlViewCountResult)
val resultStream = urlCountStream.keyBy(_.windowEnd)
.process(new TopN(2))
resultStream.print()
env.execute()
}
class TopN(i: Int) extends KeyedProcessFunction[Long,UrlViewCount,String]{
var urlViewCountListState: ListState[UrlViewCount] = _
override def open(parameters: Configuration): Unit = {
urlViewCountListState = getRuntimeContext.getListState(new ListStateDescriptor[UrlViewCount]("list-state", classOf[UrlViewCount]))
}
override def processElement(i: UrlViewCount, context: KeyedProcessFunction[Long, UrlViewCount, String]#Context, collector: Collector[String]): Unit = {
urlViewCountListState.add(i)
context.timerService().registerEventTimeTimer(i.windowEnd + 1)
}
override def onTimer(timestamp: Long, ctx: KeyedProcessFunction[Long, UrlViewCount, String]#OnTimerContext, out: Collector[String]): Unit ={
val urlViewCountList = urlViewCountListState.get().toList
val topnList = urlViewCountList.sortBy(-_.count).take(i)
val result = new mutable.StringBuilder()
result.append(s"===========窗口:${timestamp - 1 - 10000} ~ ${timestamp-1}==========\n")
for (i <- topnList.indices){
var urlViewCount = topnList(i)
result.append(s"浏览量Top ${i+1} ")
.append(s"url: ${urlViewCount.url}")
.append(s"浏览量是:${urlViewCount.count} \n")
}
out.collect(result.toString())
}
}
}
TopNProcessAllWindowExample
package com.liao.chapter07
import com.liao.chapter05.ClickSource
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessAllWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.SlidingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
import scala.collection.mutable
object TopNProcessAllWindowExample {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val stream = env.addSource(new ClickSource)
.assignAscendingTimestamps(_.timestamp)
stream.map(_.url)
.windowAll(SlidingEventTimeWindows.of(Time.seconds(10),Time.seconds(5)))
.process(new ProcessAllWindowFunction[String, String, TimeWindow] {
override def process(context: Context, elements: Iterable[String], out: Collector[String]): Unit = {
val urlCountMap = mutable.Map[String, Long]()
elements.foreach(
data => urlCountMap.get(data) match {
case Some(count) => urlCountMap.put(data,count+1)
case None => urlCountMap.put(data,1L)
}
)
val urlCountList = urlCountMap.toList.sortBy(-_._2).take(2)
val result = new mutable.StringBuilder()
result.append(s"===========窗口:${context.window.getStart} ~ ${context.window.getEnd}==========\n")
for (i <- urlCountList.indices){
var tuple = urlCountList(i)
result.append(s"浏览量Top ${i+1} ")
.append(s"url: ${tuple._1}")
.append(s"浏览量是:${tuple._2} \n")
}
out.collect(result.toString())
}
})
.print()
env.execute()
}
}