Flink开发五步之第三步:Transform算子汇总(直接动手操练吧)

map

package com.third_transform
import org.apache.flink.streaming.api.scala._

object Transform_Map {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[String] = Array("hello flink","hello world1","hello world1","hello world2")
    val ds: DataStream[String] = env.fromCollection(arr)

    val MapedDS: DataStream[String] = ds.map(r => {
      val wordArr: Array[String] = r.split(" ")
      wordArr(0) + "__" + wordArr(1)
    })
    MapedDS.print("stream")
    env.execute()
  }
}

flatmap

package com.third_transform

import org.apache.flink.streaming.api.scala._

object Transform_FlatMap {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[String] = Array("hello flink","hello world1","hello world1","hello world2")
    val ds: DataStream[String] = env.fromCollection(arr)
    val flatMapedDS: DataStream[String] = ds.flatMap(r => {
      val wordArr: Array[String] = r.split(" ")
      wordArr
    })
    flatMapedDS.print("stream")
    env.execute()
  }
}

filter

package com.third_transform

import org.apache.flink.streaming.api.scala._

object Transform_Filter {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[String] = Array("hello flink","hello world1","hello world1","hello world2")
    val ds: DataStream[String] = env.fromCollection(arr)

    val filteredDS: DataStream[String] = ds.flatMap(r => {
      val wordArr: Array[String] = r.split(" ")
      wordArr
    }).filter(_ != "hello")

    filteredDS.print("stream")
    env.execute()
  }
}

keyby + reduce

package com.third_transform

import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.streaming.api.scala._

object Transform_KeyBy {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[String] = Array("hello1 flink","hello1 world1","hello2 world1","hello3 world2")
    val ds: DataStream[String] = env.fromCollection(arr)

    // DataStream ===>>> KeyedStream
    val keyByedKS: KeyedStream[(String, String), Tuple] = ds.map(r => {
      val wordArr: Array[String] = r.split(" ")
      (wordArr(0), wordArr(1))
    }).keyBy(0)

    // KeyedStream ===>>> DataStream
    //一个分组数据流的聚合操作,合并当前的元素
    //和上次聚合的结果,产生一个新的值,返回的流中包含每一次聚合的结果,而不是
    //只返回最后一次聚合的最终结果
    val reducedDS: DataStream[(String, String)] = keyByedKS.reduce((v1, v2) => (v1._1 + "---" + v2._1, v1._2 + "---" + v2._2))

    reducedDS.print("stream")
    env.execute()
  }
}

滚动聚合算子(rolling Aggregation)

sum()

min()

max()

minBy()

maxBy()

package com.third_transform

import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.streaming.api.scala._

object Transform_RollingAggregation {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[String] = Array("hello 1","hello 2","world 2","world 3")
    val ds: DataStream[String] = env.fromCollection(arr)

    val keyByedKS: KeyedStream[(String, Int), Tuple] = ds.map(r => {
      val wordArr: Array[String] = r.split(" ")
      (wordArr(0), wordArr(1).toInt)
    }).keyBy(0)
    val sumedDS: DataStream[(String, Int)] = keyByedKS.sum(1)
    val minedDS: DataStream[(String, Int)] = keyByedKS.min(1)
    val maxedDS: DataStream[(String, Int)] = keyByedKS.max(1)
    val minByedDS: DataStream[(String, Int)] = keyByedKS.minBy(1)
    val maxByedDS: DataStream[(String, Int)] = keyByedKS.maxBy(1)
    sumedDS.print("stream1")
    minedDS.print("stream2")
    maxedDS.print("stream3")
    minByedDS.print("stream4")
    maxByedDS.print("stream5")
    env.execute()
  }
}

split和select

DataStream → SplitStream:根据某些特征把一个 DataStream 拆分成两个或者多个 DataStream。
SplitStream→DataStream:从一个 SplitStream中获取一个或者多个 DataStream。

package com.third_transform

import org.apache.flink.streaming.api.scala._

object Transform_SplitAndSelect {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr: Array[(String, Int)] = Array(("hello1", 1),("hello2", 2), ("hello2", 3), ("hello3", 4))
    val ds: DataStream[(String, Int)] = env.fromCollection(arr)

    val splitedSS: SplitStream[(String, Int)] = ds.split(r => {
      if (r._2 > 2) Seq("big") else Seq("small")
    })

    val bigDS: DataStream[(String, Int)] = splitedSS.select("big")
    val smallDS: DataStream[(String, Int)] = splitedSS.select("small")
    val allDS: DataStream[(String, Int)] = splitedSS.select("big", "small")
    bigDS.print("bigDS")
    smallDS.print("smallDS")
    allDS.print("allDS")
    env.execute()
  }
}

connect和comap

package com.third_transform

import org.apache.flink.streaming.api.scala._

object Transform_ConnectAndComap {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr1: Array[(String, Int)] = Array(("hello1", 1),("hello2", 2), ("hello2", 3), ("hello3", 4))
    val ds1: DataStream[(String, Int)] = env.fromCollection(arr1)
    val arr2: Array[(Int, String)] = Array((1,"hello"), (2,"hello"), (3,"hello"))
    val ds2: DataStream[(Int, String)] = env.fromCollection(arr2)

    val connectedCS: ConnectedStreams[(String, Int), (Int, String)] = ds1.connect(ds2)

    val coMap: DataStream[(Any, Any)] = connectedCS.map(r1 => (r1._1, r1._2 - 5), r2 => (r2._1 + 5, r2._2))

    coMap.print("stream")
    env.execute()
  }
}

union

package com.third_transform

import org.apache.flink.streaming.api.scala._

object Transform_Union {
  def main(args: Array[String]): Unit = {
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    val arr1: Array[(String, Int)] = Array(("hello1", 1),("hello2", 2), ("hello2", 3), ("hello3", 4))
    val ds1: DataStream[(String, Int)] = env.fromCollection(arr1)
    val arr2: Array[(String, Int)] = Array(("hello2", 2), ("hello2", 3), ("hello3", 4))
    val ds2: DataStream[(String, Int)] = env.fromCollection(arr2)
    val unionedDS: DataStream[(String, Int)] = ds1.union(ds2)
    unionedDS.print("stream")
    env.execute()
  }
}

待续

split和select的新写法

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值