Flink学习——分流与合流

目录

一、数据分流——侧输出流

(一)侧输出流全部输出

(二)修改侧输出流的输出结构 

二、合流


一、数据分流——侧输出流

(一)侧输出流全部输出

import org.apache.flink.streaming.api.scala._
import source.SensorReading
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector

/**
 * 侧输出流
 */
object SideOutPutTest {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    val path = "D:\\javaseprojects\\flinkstu\\resources\\sensor.txt"
    val inputStream: DataStream[String] = env.readTextFile(path)

    val dataStream: DataStream[SensorReading] = inputStream.map(data => {
      val arr: Array[String] = data.split(",")
      SensorReading(arr(0).trim, arr(1).trim.toLong, arr(2).trim.toDouble)
    })
    
    val tempStream: DataStream[SensorReading] = dataStream.process(new SplitTempProcessor2)
    tempStream.print("normal")
    val tempStream1: DataStream[(String, Long, Double)] = tempStream.getSideOutput(new OutputTag[(String, Long, Double)]("low"))
    tempStream1.print("low")
    val tempStream2: DataStream[(String, Long, Double)] = tempStream.getSideOutput(new OutputTag[(String, Long, Double)]("high"))
    tempStream2.print("high")

    env.execute("split test")
  }
}

// 高于37度,高温;低于35,低烧;中间,正常
class SplitTempProcessor2 extends ProcessFunction[SensorReading, SensorReading] {
  override def processElement(value: SensorReading,
                              ctx: ProcessFunction[SensorReading, SensorReading]#Context,
                              out: Collector[SensorReading]): Unit = {
    if (value.temperature >= 35 && value.temperature < 37) {
      out.collect(value)
    } else if (value.temperature < 35) {
      ctx.output(new OutputTag[(String, Long, Double)]("low"), (value.id, value.timestamp, value.temperature))
    } else {
      ctx.output(new OutputTag[(String, Long, Double)]("high"), (value.id, value.timestamp, value.temperature))
    }
  }
}

运行结果:

high> (sensor_1,1684201947,39.8)
low> (sensor_4,1684202000,17.7)
low> (sensor_3,1684202064,27.3)
low> (sensor_7,1684202068,13.8)
normal> SensorReading(sensor_1,1684201910,36.8)
high> (sensor_1,1684202012,44.7)
high> (sensor_1,1684201973,38.16)
high> (sensor_1,1684201973,38.16)

注意:

        1.侧输出流可以>=1个

        2.自定义函数与tempStream.getSideOutput中,new OutputTag[(String, Long, Double)](id = "low")一定要一样,如果id名不一样无法打印!!!

(二)修改侧输出流的输出结构 

import source.SensorReading
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector

object TransformTest {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)

    // TODO 加载数据源
    val path = "D:\\javaseprojects\\flinkstu\\resources\\sensor.txt"
    val inputStream: DataStream[String] = env.readTextFile(path)

    // TODO 数据计算
    val dataStream: DataStream[SensorReading] = inputStream.map(data => {
      val arr: Array[String] = data.split(",")
      SensorReading(arr(0).trim, arr(1).trim.toLong, arr(2).trim.toDouble)
    })

    
    // TODO 主流
    val tempStream: DataStream[SensorReading] = dataStream.process(new MySplitFunction)
    tempStream.print("正常")

    // todo 打印侧输出流
    val tempStream2: DataStream[(Long, Double)] = tempStream.getSideOutput(new OutputTag[(Long, Double)]("低烧"))
    tempStream2.print("低烧")

    val tempStream3: DataStream[Double] = tempStream.getSideOutput(new OutputTag[Double]("高烧"))
    tempStream3.print("高烧")

    env.execute()
  }
}

// 35~37 35以下 37以上
class MySplitFunction extends ProcessFunction[SensorReading, SensorReading] {
  override def processElement(value: SensorReading,
                              ctx: ProcessFunction[SensorReading, SensorReading]#Context,
                              out: Collector[SensorReading]): Unit = {
    if (value.temperature >= 35 && value.temperature <= 37) {
      out.collect(value) // 正常
    } else if (value.temperature < 35) {
      ctx.output(new OutputTag[(Long, Double)]("低烧"), (value.timestamp, value.temperature))
    } else {
      ctx.output(new OutputTag[Double]("高烧"), ( value.temperature))
    }
  }
}

运行结果:

高烧> 39.8
低烧> (1684202000,17.7)
低烧> (1684202064,27.3)
低烧> (1684202068,13.8)
正常> SensorReading(sensor_1,1684201910,36.8)
高烧> 44.7
高烧> 38.16
正常> SensorReading(sensor_1,1684201973,36.16)

二、合流

import source.SensorReading
import org.apache.flink.streaming.api.functions.ProcessFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.{DataStream, StreamExecutionEnvironment}
import org.apache.flink.util.Collector

object TransformTest {
  def main(args: Array[String]): Unit = {
    // TODO 合流
    // TODO 流合并方法一:当合并的两个流类型一致时,需要使用union进行合并
    val unionStream: DataStream[(String, Long, Double)] = tempStream2.union(tempStream3)
       unionStream.print("不正常")

运行结果:

不正常> (sensor_4,1684202000,17.7)
不正常> (sensor_3,1684202064,27.3)
不正常> (sensor_7,1684202068,13.8)
不正常> (sensor_1,1684201947,39.8)
不正常> (sensor_1,1684202012,44.7)
不正常> (sensor_1,1684201973,38.16)

    // TODO 流合并方法二:当合并的两个流类型不一致时,需要使用connect进行合并
    val connectedStream: ConnectedStreams[SensorReading, (String, Long, Double)] = tempStream.connect(tempStream2)

    val connectMapStream: DataStream[(String, Long, Double)] = connectedStream.map(
      data1 => {
        (data1.id, data1.timestamp, data1.temperature)
      },
      data2 => {
        (data2._1, data2._2, data2._3)
      }
    )
    connectMapStream.print()

运行结果:

(sensor_1,1684201910,36.8)
(sensor_4,1684202000,17.7)
(sensor_1,1684201973,36.16)
(sensor_3,1684202064,27.3)
(sensor_7,1684202068,13.8)

    val connectedStream1: ConnectedStreams[SensorReading, (String, Long, Double)] = tempStream.connect(tempStream3)
    val connectMapStream1: DataStream[(String, Long, Double)] = connectedStream1.map(
      data1 => {
        (data1.id, data1.timestamp, data1.temperature)
      }
      ,
      data2 => {
        (data2._1, data2._2, data2._3)
      }
    )
    connectMapStream1.print()

运行结果:

(sensor_1,1684201910,36.8)
(sensor_1,1684201947,39.8)
(sensor_1,1684201973,36.16)
(sensor_1,1684202012,44.7)
(sensor_1,1684201973,38.16)

+++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
    // todo 返回三元组
    val connectMapStream2: DataStream[(String, Long, Double)] = connectedStream.map(new CoMapFunction[SensorReading, (String, Long, Double), (String, Long, Double)] {
      override def map1(value: SensorReading): (String, Long, Double) = {
        (value.id, value.timestamp, value.temperature)
      }
      override def map2(value: (String, Long, Double)): (String, Long, Double) = {
        (value._1, value._2, value._3)
      }
    })
    connectMapStream2.print("connectMap")

    // todo 返回SensorReading对象
    val connectMapStream3: DataStream[SensorReading] = connectedStream.map(new CoMapFunction[SensorReading, (String, Long, Double), SensorReading] {
      override def map1(value: SensorReading): SensorReading = {
        SensorReading(value.id, value.timestamp, value.temperature)
      }
      override def map2(value: (String, Long, Double)): SensorReading = {
        SensorReading(value._1, value._2, value._3)
      }
    })
    connectMapStream3.print()

    env.execute()
  }
}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值