大部分的 DataStream API 的算子的输出是单一输出,也就是某种数据类型的流。除了 split 算子,可以将一条流分成多条流,这些流的数据类型也都相同。process function 的 side outputs 功能可以产生多条流,并且这些流的数据类型可以不一样。一个 side output 可以定义为 OutputTag[X]对象,X 是输出流的数据类型。process function 可以通过 Context 对象发射一个事件到一个或者多个 side outputs。
案例:
package flink.chapter6ProcessFunction
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala.{OutputTag, StreamExecutionEnvironment}
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.util.Collector
import org.apache.flink.streaming.api.scala._
object Demo3 {
def main(args: Array[String]): Unit = {
val env = StreamExecutionEnvironment.getExecutionEnvironment
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
env.setParallelism(1)
val dataStream = env.socketTextStream("hadoop101",9999)
val texStream = dataStream
.map{
line => val words = line.split("\t")
(words(0).trim,words(1).trim.toLong,words(2).trim.toDouble)
}
.assignTimestampsAndWatermarks(
new BoundedOutOfOrdernessTimestampExtractor[(String, Long, Double)](Time.seconds(2)) {
override def extractTimestamp(t: (String, Long, Double)): Long = {
t._2*1000
}
}
).process(new MySideOutputFun)
texStream.print("texStream::::").setParallelism(1)
texStream.getSideOutput(new OutputTag[String]("one")).print()
texStream.getSideOutput(new OutputTag[String]("tow")).print()
env.execute("Dmeo3")
}
}
class MySideOutputFun extends ProcessFunction[(String, Long, Double),(String, Long, Double)]{
// 定义一个侧输出标签
lazy val one = new OutputTag[String]("one")
lazy val tow = new OutputTag[String]("tow")
override def processElement(i: (String, Long, Double),
context: ProcessFunction[(String, Long, Double), (String, Long, Double)]#Context,
collector: Collector[(String, Long, Double)]): Unit = {
if(i._3<0){
context.output(one," le one = "+i._3)
}else if(i._3>0 && i._3<32) {
context.output(tow," le tow = "+i._3)
}else {
collector.collect(i)
}
}
}
测试结果:如下图