**Flink入门Demo:**侧输出流的使用,官方已经建议使用SideOutput取代split算子了
更多的细节请看代码注释
学习资料都来源于尚硅谷,开源免费,很感谢他们戳我直达
数据源格式为
sensor_1, 1547718199, 35.80018327300259
sensor_1, 1547718210, 28.39839108328901
sensor_6, 1547718203, 15.40298439340308
sensor_6, 1547718211, 23.76213902108990
sensor_1, 1547718201, 34.12333412589598
sensor_7, 1547718202, 6.720945201171228
sensor_10, 1547718205, 38.1010676048934
sensor_10, 1547718206, 33.9334534654789
更多的细节请看代码注释
老生常谈,CSDN不支持Scala,看官请将就
import com.ck.readFrom.SensorReading
import org.apache.flink.api.common.restartstrategy.RestartStrategies
import org.apache.flink.streaming.api.CheckpointingMode
import org.apache.flink.streaming.api.environment.CheckpointConfig.ExternalizedCheckpointCleanup
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
object SideOutputTest {
def main(args: Array[String]) {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.enableCheckpointing(60000) // 设置保存检查点,时间间隔1分/次
env.getCheckpointConfig.setCheckpointingMode(CheckpointingMode.EXACTLY_ONCE) // 设置保存级别
env.getCheckpointConfig.setCheckpointTimeout(100000) // 超时时间,超过丢弃
env.getCheckpointConfig.setFailOnCheckpointingErrors(true) // 如果检查点出错是否要丢弃整个job,默认true
env.getCheckpointConfig.setMaxConcurrentCheckpoints(2) // 当前最大同时保存CP的数量,默认1
env.getCheckpointConfig.setMinPauseBetweenCheckpoints(100) // 两个CP最小时间间隔,与上条可能冲突
// 开启外部持久化,当job被手动取消,是否还需要保存CP
env.getCheckpointConfig.enableExternalizedCheckpoints(ExternalizedCheckpointCleanup.DELETE_ON_CANCELLATION)
env.setRestartStrategy(RestartStrategies.fixedDelayRestart(3, 500)) // 重启尝试,最大尝试3次,每次间隙500ms
env.setRestartStrategy(RestartStrategies.failureRateRestart(
3,
org.apache.flink.api.common.time.Time.seconds(500),
org.apache.flink.api.common.time.Time.seconds(10)
)) // 在500秒内重启3次,每次间隙10秒
val streamFromFile = env.socketTextStream("localhost", 7777)
val stream: DataStream[SensorReading] = streamFromFile.map(data => {
val dataArray = data.split(",")
SensorReading(dataArray(0).trim, dataArray(1).trim.toLong, dataArray(2).trim.toDouble)
})
.assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[SensorReading](Time.seconds(1)) {
override def extractTimestamp(t: SensorReading): Long = t.timestamp * 1000
}) // 底层也是周期性生成
val processedStream = stream.process(new FreezingAlert())
processedStream.print("processed data ") // 输入温度在32及以上输出该流
// 打印侧输出流
processedStream.getSideOutput(new OutputTag[String]("freezing alert")).print("alert data") // 输入温度在32以下输出报警
env.execute()
}
}
// 如果温度小于32,输出报警信息到侧输出流
class FreezingAlert() extends ProcessFunction[SensorReading, SensorReading] {
lazy val alertOutput: OutputTag[String] = new OutputTag[String]("freezing alert")
override def processElement(i: SensorReading, context: ProcessFunction[SensorReading, SensorReading]#Context,
collector: Collector[SensorReading]): Unit = {
if (i.temperature < 32.0) {
context.output(alertOutput, "freezing alert for " + i.id)
} else {
collector.collect(i)
}
}
}