Flink代码之处理迟到元素(七)

一、带窗口,将迟到元素输出到侧输出流

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

object LateElementToSideOutput {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setParallelism(1)

    val readings = env
      .socketTextStream("hadoop103", 9999, '\n')
      .map(line => {
        val arr = line.split(" ")
        (arr(0), arr(1).toLong * 1000)
      })
      .assignAscendingTimestamps(_._2)
//      .assignTimestampsAndWatermarks(
//        new BoundedOutOfOrdernessTimestampExtractor[(String, Long)](Time.milliseconds(1)) {
//          override def extractTimestamp(element: (String, Long)): Long = element._2
//        }
//      )
      .keyBy(_._1)
      .timeWindow(Time.seconds(10))
      .sideOutputLateData(
        new OutputTag[(String, Long)]("late")
      )
      .process(new CountFunction)


    readings.print()
    readings.getSideOutput(new OutputTag[(String, Long)]("late")).print()

    env.execute()
  }

  class CountFunction extends ProcessWindowFunction[(String, Long), String, String, TimeWindow] {
    override def process(key: String, context: Context, elements: Iterable[(String, Long)], out: Collector[String]): Unit = {
      out.collect(context.window.getStart + "到" + context.window.getEnd + "的窗口闭合了!")
    }
  }
}

二、将不带窗口的流,迟到元素输出到侧输出流

import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.util.Collector

object LateElementToSideOutputNonWindow {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)

    val readings = env
      .socketTextStream("hadoop103", 9999, '\n')
      .map(line => {
        val arr = line.split(" ")
        (arr(0), arr(1).toLong * 1000L)
      })
      .assignAscendingTimestamps(_._2)
      .process(new LateToSideOutput)

    readings.print()
    readings.getSideOutput(new OutputTag[String]("late")).print()

    env.execute()
  }

  class LateToSideOutput extends ProcessFunction[(String, Long), String] {
    val lateReadingOutput = new OutputTag[String]("late")

    override def processElement(value: (String, Long), ctx: ProcessFunction[(String, Long), String]#Context, out: Collector[String]): Unit = {
      if (value._2 < ctx.timerService().currentWatermark()) {
        ctx.output(lateReadingOutput, "迟到事件来了!")
      } else {
        out.collect("没有迟到的事件来了!")
      }
    }
  }
}

三、对于迟到元素,等待一段时间,更新窗口计算结果

import org.apache.flink.api.common.state.ValueStateDescriptor
import org.apache.flink.api.scala.typeutils.Types
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

//a 1
//a 2
//a 1
//a 2
//a 4
//a 10
//a 1
//a 1
//a 15
//a 1
object UpdateWindowResultWithLateElement {
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    env.setParallelism(1)

    val stream = env
      .socketTextStream("hadoop103", 9999, '\n')
      .map(line => {
        val arr = line.split(" ")
        (arr(0), arr(1).toLong * 1000L)
      })
      .assignTimestampsAndWatermarks(
        new BoundedOutOfOrdernessTimestampExtractor[(String, Long)](Time.seconds(5)) {
          override def extractTimestamp(element: (String, Long)): Long = element._2
        }
      )
      .keyBy(_._1)
      .timeWindow(Time.seconds(5))
      .allowedLateness(Time.seconds(5))
      .process(new UpdatingWindowCountFunction)

    stream.print()
    env.execute()
  }

/**
  *	全窗口函数,等窗口关闭后才调process方法
  */
  class UpdatingWindowCountFunction extends ProcessWindowFunction[(String, Long), String, String, TimeWindow] {
    // process和processElement的区别?
    // processElement用于KeyedProcessFunction中,也就是没有开窗口的流,来一条元素调用一次
    // process函数用于ProcessWindowFunction中,水位线超过窗口结束时间时调用一次
    override def process(key: String, context: Context, elements: Iterable[(String, Long)], out: Collector[String]): Unit = {
      val count = elements.size

      // 基于窗口的状态变量,仅当前窗口可见
      // 默认值是false
      val isUpdate = context.windowState.getState(
        new ValueStateDescriptor[Boolean]("is-update", Types.of[Boolean])
      )

      if (!isUpdate.value()) {
        out.collect("当水位线超过窗口结束时间的时候,窗口第一次触发计算!元素数量是 " + count + " 个!")
        isUpdate.update(true)
      } else {
        // 迟到元素到来以后,更新窗口的计算结果
        out.collect("迟到元素来了!元素数量是 " + count + " 个!")
      }
    }
  }
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值