一、带窗口,将迟到元素输出到侧输出流
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
object LateElementToSideOutput {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val readings = env
.socketTextStream("hadoop103", 9999, '\n')
.map(line => {
val arr = line.split(" ")
(arr(0), arr(1).toLong * 1000)
})
.assignAscendingTimestamps(_._2)
.keyBy(_._1)
.timeWindow(Time.seconds(10))
.sideOutputLateData(
new OutputTag[(String, Long)]("late")
)
.process(new CountFunction)
readings.print()
readings.getSideOutput(new OutputTag[(String, Long)]("late")).print()
env.execute()
}
class CountFunction extends ProcessWindowFunction[(String, Long), String, String, TimeWindow] {
override def process(key: String, context: Context, elements: Iterable[(String, Long)], out: Collector[String]): Unit = {
out.collect(context.window.getStart + "到" + context.window.getEnd + "的窗口闭合了!")
}
}
}
二、将不带窗口的流,迟到元素输出到侧输出流
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector
object LateElementToSideOutputNonWindow {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val readings = env
.socketTextStream("hadoop103", 9999, '\n')
.map(line => {
val arr = line.split(" ")
(arr(0), arr(1).toLong * 1000L)
})
.assignAscendingTimestamps(_._2)
.process(new LateToSideOutput)
readings.print()
readings.getSideOutput(new OutputTag[String]("late")).print()
env.execute()
}
class LateToSideOutput extends ProcessFunction[(String, Long), String] {
val lateReadingOutput = new OutputTag[String]("late")
override def processElement(value: (String, Long), ctx: ProcessFunction[(String, Long), String]#Context, out: Collector[String]): Unit = {
if (value._2 < ctx.timerService().currentWatermark()) {
ctx.output(lateReadingOutput, "迟到事件来了!")
} else {
out.collect("没有迟到的事件来了!")
}
}
}
}
三、对于迟到元素,等待一段时间,更新窗口计算结果
import org.apache.flink.api.common.state.ValueStateDescriptor
import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
object UpdateWindowResultWithLateElement {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val stream = env
.socketTextStream("hadoop103", 9999, '\n')
.map(line => {
val arr = line.split(" ")
(arr(0), arr(1).toLong * 1000L)
})
.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor[(String, Long)](Time.seconds(5)) {
override def extractTimestamp(element: (String, Long)): Long = element._2
}
)
.keyBy(_._1)
.timeWindow(Time.seconds(5))
.allowedLateness(Time.seconds(5))
.process(new UpdatingWindowCountFunction)
stream.print()
env.execute()
}
class UpdatingWindowCountFunction extends ProcessWindowFunction[(String, Long), String, String, TimeWindow] {
override def process(key: String, context: Context, elements: Iterable[(String, Long)], out: Collector[String]): Unit = {
val count = elements.size
val isUpdate = context.windowState.getState(
new ValueStateDescriptor[Boolean]("is-update", Types.of[Boolean])
)
if (!isUpdate.value()) {
out.collect("当水位线超过窗口结束时间的时候,窗口第一次触发计算!元素数量是 " + count + " 个!")
isUpdate.update(true)
} else {
out.collect("迟到元素来了!元素数量是 " + count + " 个!")
}
}
}
}