Flink处理迟到数据

结合增量聚合函数和全窗口函数

import com.google.gson.{JsonObject, JsonParser}
import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.connector.kafka.source._
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector

import java.time.Duration


case class example(pro_name:String,price:Long,shoptime:Long)
case class outexample(pro_name:String,count:Long,start:Long,end:Long)

//统计商品的购买次数
object ProcessLateDataTest {
  def main(args: Array[String]): Unit = {
    //获取执行环境
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    //设置并行度为1
    env.setParallelism(1)
    //创建KafkaSource
    val source: KafkaSource[String] = KafkaSource.builder[String]
      .setBootstrapServers("n1:9092")
      .setTopics("order")
      .setGroupId("group")
      .setStartingOffsets(OffsetsInitializer.latest())
      .setValueOnlyDeserializer(new SimpleStringSchema())
      .build()
    //读取Kafka数据,并设置水位线生成策略
    val value: DataStream[String] = env.fromSource(source, WatermarkStrategy.noWatermarks(), "kafka")

    val value1: DataStream[String] = value.assignTimestampsAndWatermarks(WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofSeconds(2))
      .withTimestampAssigner(new SerializableTimestampAssigner[String] {
        override def extractTimestamp(t: String, l: Long) = {
          val jsonObject: JsonObject = JsonParser.parseString(t).getAsJsonObject
          val shoptime: Long = jsonObject.get("shoptime").getAsLong
          shoptime
        }
      })
    )

    //对数据进行转换,并根据商品名称进行分组
    val stream: DataStream[example] = value.map(map => {
      val jsonObject: JsonObject = JsonParser.parseString(map).getAsJsonObject
      val pro_name: String = jsonObject.get("pro_name").getAsString
      val price: Long = jsonObject.get("price").getAsLong
      val shoptime: Long = jsonObject.get("shoptime").getAsLong
      example(pro_name, price, shoptime)
    })
    //定义一个侧输出流的标签
    val output: OutputTag[example] = OutputTag[example]("lata-data")
    //增量函数与全窗口函数的结合使用,包装统计数据
    val result = value1
      .map(map => {
        val jsonObject: JsonObject = JsonParser.parseString(map).getAsJsonObject
        val pro_name: String = jsonObject.get("pro_name").getAsString
        val price: Long = jsonObject.get("price").getAsLong
        val shoptime: Long = jsonObject.get("shoptime").getAsLong
        example(pro_name, price, shoptime)
      })
      .keyBy(_.pro_name)
      .window(TumblingEventTimeWindows.of(Time.seconds(5)))
      //指定窗口等待时间
      .allowedLateness(Time.minutes(1))
      //将迟到数据输出到侧输出流
      .sideOutputLateData(output)
      .aggregate(new Aggre, new Proce)
    result.print("result")
    stream.print("input")
    result.getSideOutput(output).print("lata-data")
    env.execute()
  }
  class Aggre extends AggregateFunction[example,Long,Long]{
    override def createAccumulator(): Long = 0L

    override def add(in: example, acc: Long): Long = acc + 1

    override def getResult(acc: Long): Long = acc

    override def merge(acc: Long, acc1: Long): Long = ???
  }
  class Proce extends ProcessWindowFunction[Long,outexample,String,TimeWindow]{
    override def process(pro_name: String, context: Context, elements: Iterable[Long], out: Collector[outexample]): Unit = {
      val start: Long = context.window.getStart
      val end: Long = context.window.getEnd
      val count: Long = elements.iterator.next()
      out.collect(outexample(pro_name,count,start,end))
    }
  }
}
  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值