flink中没有 算子的划分都是transformation算子。
3:transformations
addSource
keyBy[DataStream —>keyedStream]
用key对数据进行重新分区
reduce[keyedStream —>DataStream]只接受keyedStream类型的数据,对相同的key进行数据的独立计算
Aggregationsp[keyedStream —>DataStream] 有sum算子,min,max, minBy ,maxBy
map
flatmap
union 连接相同类型格式的数据,可以连接多个数据集
connect
它只可以连接两中类型的数据集 ,连接不同类型格式的数据,数据不能直接print,因为它的类型是ConnectedStreams要使用其他的算子使他装换为DataStream。
split与Select来划分数据集
split 对数据集进行打标签
真正划分的是select,select根据标签来进行划分数据集(分区)
filter过滤
设置条件对数据进行过滤,不满足的条件的过滤掉
package cn.dss.Fiink_DataStream.Transformations
import org.apache.flink.api.java.tuple.Tuple
import org.apache.flink.streaming.api.scala.{ConnectedStreams, DataStream, KeyedStream, SplitStream, StreamExecutionEnvironment}
/**
*
* @author: 董世森
* @version 1.0 2020/7/17
*
*/
object keyBy_ReduceDemo {
def main(args: Array[String]): Unit = {
val environment: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
import org.apache.flink.api.scala._
environment.setParallelism(1)
val dataStream = environment.fromElements(("a", 3), ("d", 4), ("c", 2), ("c", 5), ("a", 5))
val arr1: DataStream[(String, Int)] = environment.fromElements(("a", 1), ("b", 2), ("c", 3))
val arr2: DataStream[(String, Int)] = environment.fromElements(("b", 1), ("b", 2), ("d", 3))
val intarr: DataStream[Int] = environment.fromElements(26, 34, 3, 13)
//union算子,必须是相同的类型的数据才可以进行union操作
val arrdata: DataStream[(String, Int)] = arr1.union(arr2)
// arrdata.print()
val keydatas: KeyedStream[(String, Int), Tuple] = arrdata.keyBy(0)
//keyBy算子
// val astr: KeyedStream[(String, Int), Tuple] = arr1.keyBy("a")
// keydatas.print()
//reduce算子
val data: DataStream[(String, Int)] = keydatas.reduce((t1, t2) => {
var sum = t1._2 + t2._2
(t1._1, sum)
})
// data.print()
print("******filter******")
//过滤算子
val filterdata: DataStream[Int] = intarr.filter(_ > 20)
// filterdata.print()
// print("******Afilter******")
val alist: DataStream[(String, Int)] = arrdata.filter(_._1.equals("a"))
// alist.print()
//Aggregations[keyedStream ---> DataStream],sum,min,max
val sumdata: DataStream[(String, Int)] = arr1.keyBy(0).sum(1)
val max: DataStream[(String, Int)] = arr1.keyBy(0).max(0)
val min: DataStream[(String, Int)] = arr1.keyBy(0).min(1)
val minByValue: DataStream[(String, Int)] = arr1.keyBy(0).minBy(1)
val maxByValue: DataStream[(String, Int)] = arr1.keyBy(0).maxBy(1)
// sumdata.print()
// max.print()
// min.print()
// maxByValue.print()
// minByValue.print()
//连接不同的类型的数据集
val connectarr: ConnectedStreams[(String, Int), Int] = arr1.connect(intarr)
val result: DataStream[(Any, Any)] = connectarr.map(
t1 => {
(t1._1, t1._2)
},
t2 => {
(t2, 0)
}
)
// result.print()
//split与Select来切分数据
val num: SplitStream[Int] = intarr.split(t => if (t % 2 == 0) Seq("even") else Seq("odd"))
val evennum: DataStream[Int] = num.select("even")
val oddnum: DataStream[Int] = num.select("odd")
// evennum.print()
// oddnum.print()
val arr: SplitStream[(String, Int)] = arrdata.split(t => {
if (t._2 % 2 == 0) Seq("even") else Seq("odd")
})
val selectv: DataStream[(String, Int)] = arr.select("even", "odd")
selectv.print()
environment.execute("testKeyreduce")
}
}