watermark延迟时间策略 最大的时间-最大的延迟程度>=窗口时 窗口关闭
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718129,29.8
sensor_1,1547718158,5.8
sensor_1,1547718140,40.8
sensor_1,1547718111,11.8
package com.water
//定义样例类,温度传感器
case class SensorReading4(id:String,timestamp:Long,temperature:Double)
import org.apache.flink.api.common.functions.ReduceFunction
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
//定义样例类,温度传感器
case class SensorReading6(id:String,timestamp:Long,temperature:Double)
object WaterMark {
def main(args: Array[String]): Unit = {
val env=StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.getConfig.setAutoWatermarkInterval(500) //watermark生成周期
val inputStream=env.socketTextStream("localhost",7777)
//1.先转换成样例类类型
val dataStream= inputStream.map(data=>{
val arr = data.split(",")
SensorReading6(arr(0),arr(1).toLong,arr(2).toDouble)
})
//.assignAscendingTimestamps(_.timestamp*1000L)//如果数据可以保证顺序的话用这个
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[SensorReading6](Time.seconds(3)) {//这里是最大乱序程度
override def extractTimestamp(t: SensorReading6): Long = {
t.timestamp*1000L //毫秒数
}
}) //1.如果数据可能是乱序用这个
val lateTap=new OutputTag[SensorReading6]("late")
//每15秒统计一次,窗口内各传感器所有温度的最小值,最小的温度值
val resultStream= dataStream
.keyBy("id")//按照二元组的第一个元素分组(id)
.timeWindow(Time.seconds(15)) //简写 滚动窗口
.allowedLateness(Time.minutes(1))//2.允许处理1分钟的延迟数据,在水位线的基础上
.sideOutputLateData(lateTap) //3.放到侧输出流里面
.reduce(new MyReducer)
resultStream.getSideOutput(lateTap).print("lateTag")
resultStream.print()
env.execute()
}
}
class MyReducer extends ReduceFunction[SensorReading6]{
override def reduce(t1: SensorReading6, t2: SensorReading6): SensorReading6 = {
SensorReading6(t1.id,t2.timestamp,t1.temperature.min(t2.temperature))
}
}