结合增量聚合函数和全窗口函数
import com.google.gson.{JsonObject, JsonParser}
import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
import org.apache.flink.api.common.functions.AggregateFunction
import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.connector.kafka.source.enumerator.initializer.OffsetsInitializer
import org.apache.flink.connector.kafka.source._
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.TumblingEventTimeWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.util.Collector
import java.time.Duration
case class example(pro_name:String,price:Long,shoptime:Long)
case class outexample(pro_name:String,count:Long,start:Long,end:Long)
//统计商品的购买次数
object ProcessLateDataTest {
def main(args: Array[String]): Unit = {
//获取执行环境
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
//设置并行度为1
env.setParallelism(1)
//创建KafkaSource
val source: KafkaSource[String] = KafkaSource.builder[String]
.setBootstrapServers("n1:9092")
.setTopics("order")
.setGroupId("group")
.setStartingOffsets(OffsetsInitializer.latest())
.setValueOnlyDeserializer(new SimpleStringSchema())
.build()
//读取Kafka数据,并设置水位线生成策略
val value: DataStream[String] = env.fromSource(source, WatermarkStrategy.noWatermarks(), "kafka")
val value1: DataStream[String] = value.assignTimestampsAndWatermarks(WatermarkStrategy.forBoundedOutOfOrderness(Duration.ofSeconds(2))
.withTimestampAssigner(new SerializableTimestampAssigner[String] {
override def extractTimestamp(t: String, l: Long) = {
val jsonObject: JsonObject = JsonParser.parseString(t).getAsJsonObject
val shoptime: Long = jsonObject.get("shoptime").getAsLong
shoptime
}
})
)
//对数据进行转换,并根据商品名称进行分组
val stream: DataStream[example] = value.map(map => {
val jsonObject: JsonObject = JsonParser.parseString(map).getAsJsonObject
val pro_name: String = jsonObject.get("pro_name").getAsString
val price: Long = jsonObject.get("price").getAsLong
val shoptime: Long = jsonObject.get("shoptime").getAsLong
example(pro_name, price, shoptime)
})
//定义一个侧输出流的标签
val output: OutputTag[example] = OutputTag[example]("lata-data")
//增量函数与全窗口函数的结合使用,包装统计数据
val result = value1
.map(map => {
val jsonObject: JsonObject = JsonParser.parseString(map).getAsJsonObject
val pro_name: String = jsonObject.get("pro_name").getAsString
val price: Long = jsonObject.get("price").getAsLong
val shoptime: Long = jsonObject.get("shoptime").getAsLong
example(pro_name, price, shoptime)
})
.keyBy(_.pro_name)
.window(TumblingEventTimeWindows.of(Time.seconds(5)))
//指定窗口等待时间
.allowedLateness(Time.minutes(1))
//将迟到数据输出到侧输出流
.sideOutputLateData(output)
.aggregate(new Aggre, new Proce)
result.print("result")
stream.print("input")
result.getSideOutput(output).print("lata-data")
env.execute()
}
class Aggre extends AggregateFunction[example,Long,Long]{
override def createAccumulator(): Long = 0L
override def add(in: example, acc: Long): Long = acc + 1
override def getResult(acc: Long): Long = acc
override def merge(acc: Long, acc1: Long): Long = ???
}
class Proce extends ProcessWindowFunction[Long,outexample,String,TimeWindow]{
override def process(pro_name: String, context: Context, elements: Iterable[Long], out: Collector[outexample]): Unit = {
val start: Long = context.window.getStart
val end: Long = context.window.getEnd
val count: Long = elements.iterator.next()
out.collect(outexample(pro_name,count,start,end))
}
}
}