目录
一、数据分流——侧输出流
(一)侧输出流全部输出
import org.apache.flink.streaming.api.scala._
import source.SensorReading
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
/**
* 侧输出流
*/
object SideOutPutTest {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val path = "D:\\javaseprojects\\flinkstu\\resources\\sensor.txt"
val inputStream: DataStream[String] = env.readTextFile(path)
val dataStream: DataStream[SensorReading] = inputStream.map(data => {
val arr: Array[String] = data.split(",")
SensorReading(arr(0).trim, arr(1).trim.toLong, arr(2).trim.toDouble)
})
val tempStream: DataStream[SensorReading] = dataStream.process(new SplitTempProcessor2)
tempStream.print("normal")
val tempStream1: DataStream[(String, Long, Double)] = tempStream.getSideOutput(new OutputTag[(String, Long, Double)]("low"))
tempStream1.print("low")
val tempStream2: DataStream[(String, Long, Double)] = tempStream.getSideOutput(new OutputTag[(String, Long, Double)]("high"))
tempStream2.print("high")
env.execute("split test")
}
}
// 高于37度,高温;低于35,低烧;中间,正常
class SplitTempProcessor2 extends ProcessFunction[SensorReading, SensorReading] {
override def processElement(value: SensorReading,
ctx: ProcessFunction[SensorReading, SensorReading]#Context,
out: Collector[SensorReading]): Unit = {
if (value.temperature >= 35 && value.temperature < 37) {
out.collect(value)
} else if (value.temperature < 35) {
ctx.output(new OutputTag[(String, Long, Double)]("low"), (value.id, value.timestamp, value.temperature))
} else {
ctx.output(new OutputTag[(String, Long, Double)]("high"), (value.id, value.timestamp, value.temperature))
}
}
}
运行结果:
high> (sensor_1,1684201947,39.8)
low> (sensor_4,1684202000,17.7)
low> (sensor_3,1684202064,27.3)
low> (sensor_7,1684202068,13.8)
normal> SensorReading(sensor_1,1684201910,36.8)
high> (sensor_1,1684202012,44.7)
high> (sensor_1,1684201973,38.16)
high> (sensor_1,1684201973,38.16)
注意:
1.侧输出流可以>=1个
2.自定义函数与tempStream.getSideOutput中,new OutputTag[(String, Long, Double)](id = "low")一定要一样,如果id名不一样无法打印!!!
(二)修改侧输出流的输出结构
import source.SensorReading
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
object TransformTest {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
// TODO 加载数据源
val path = "D:\\javaseprojects\\flinkstu\\resources\\sensor.txt"
val inputStream: DataStream[String] = env.readTextFile(path)
// TODO 数据计算
val dataStream: DataStream[SensorReading] = inputStream.map(data => {
val arr: Array[String] = data.split(",")
SensorReading(arr(0).trim, arr(1).trim.toLong, arr(2).trim.toDouble)
})
// TODO 主流
val tempStream: DataStream[SensorReading] = dataStream.process(new MySplitFunction)
tempStream.print("正常")
// todo 打印侧输出流
val tempStream2: DataStream[(Long, Double)] = tempStream.getSideOutput(new OutputTag[(Long, Double)]("低烧"))
tempStream2.print("低烧")
val tempStream3: DataStream[Double] = tempStream.getSideOutput(new OutputTag[Double]("高烧"))
tempStream3.print("高烧")
env.execute()
}
}
// 35~37 35以下 37以上
class MySplitFunction extends ProcessFunction[SensorReading, SensorReading] {
override def processElement(value: SensorReading,
ctx: ProcessFunction[SensorReading, SensorReading]#Context,
out: Collector[SensorReading]): Unit = {
if (value.temperature >= 35 && value.temperature <= 37) {
out.collect(value) // 正常
} else if (value.temperature < 35) {
ctx.output(new OutputTag[(Long, Double)]("低烧"), (value.timestamp, value.temperature))
} else {
ctx.output(new OutputTag[Double]("高烧"), ( value.temperature))
}
}
}
运行结果:
高烧> 39.8
低烧> (1684202000,17.7)
低烧> (1684202064,27.3)
低烧> (1684202068,13.8)
正常> SensorReading(sensor_1,1684201910,36.8)
高烧> 44.7
高烧> 38.16
正常> SensorReading(sensor_1,1684201973,36.16)
二、合流
import source.SensorReading
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector
object TransformTest {
def main(args: Array[String]): Unit = {
// TODO 合流
// TODO 流合并方法一:当合并的两个流类型一致时,需要使用union进行合并
val unionStream: DataStream[(String, Long, Double)] = tempStream2.union(tempStream3)
unionStream.print("不正常")
运行结果:
不正常> (sensor_4,1684202000,17.7)
不正常> (sensor_3,1684202064,27.3)
不正常> (sensor_7,1684202068,13.8)
不正常> (sensor_1,1684201947,39.8)
不正常> (sensor_1,1684202012,44.7)
不正常> (sensor_1,1684201973,38.16)
// TODO 流合并方法二:当合并的两个流类型不一致时,需要使用connect进行合并
val connectedStream: ConnectedStreams[SensorReading, (String, Long, Double)] = tempStream.connect(tempStream2)
val connectMapStream: DataStream[(String, Long, Double)] = connectedStream.map(
data1 => {
(data1.id, data1.timestamp, data1.temperature)
},
data2 => {
(data2._1, data2._2, data2._3)
}
)
connectMapStream.print()
运行结果:
(sensor_1,1684201910,36.8)
(sensor_4,1684202000,17.7)
(sensor_1,1684201973,36.16)
(sensor_3,1684202064,27.3)
(sensor_7,1684202068,13.8)
val connectedStream1: ConnectedStreams[SensorReading, (String, Long, Double)] = tempStream.connect(tempStream3)
val connectMapStream1: DataStream[(String, Long, Double)] = connectedStream1.map(
data1 => {
(data1.id, data1.timestamp, data1.temperature)
}
,
data2 => {
(data2._1, data2._2, data2._3)
}
)
connectMapStream1.print()
运行结果:
(sensor_1,1684201910,36.8)
(sensor_1,1684201947,39.8)
(sensor_1,1684201973,36.16)
(sensor_1,1684202012,44.7)
(sensor_1,1684201973,38.16)
+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
// todo 返回三元组
val connectMapStream2: DataStream[(String, Long, Double)] = connectedStream.map(new CoMapFunction[SensorReading, (String, Long, Double), (String, Long, Double)] {
override def map1(value: SensorReading): (String, Long, Double) = {
(value.id, value.timestamp, value.temperature)
}
override def map2(value: (String, Long, Double)): (String, Long, Double) = {
(value._1, value._2, value._3)
}
})
connectMapStream2.print("connectMap")
// todo 返回SensorReading对象
val connectMapStream3: DataStream[SensorReading] = connectedStream.map(new CoMapFunction[SensorReading, (String, Long, Double), SensorReading] {
override def map1(value: SensorReading): SensorReading = {
SensorReading(value.id, value.timestamp, value.temperature)
}
override def map2(value: (String, Long, Double)): SensorReading = {
SensorReading(value._1, value._2, value._3)
}
})
connectMapStream3.print()
env.execute()
}
}