import com.qu.source.SensorReading
import org.apache.flink.streaming.api.scala.{ConnectedStreams, DataStream, KeyedStream, SplitStream, StreamExecutionEnvironment}
import org.apache.flink.api.scala._
object TransFormTest {
def main(args: Array[String]): Unit = {
//流
val env = StreamExecutionEnvironment.getExecutionEnvironment
//数据源一:自定义集合
val stream1 = env.fromCollection(List(
SensorReading("1", 1111111111, 21.11),
SensorReading("2", 1111111112, 21.22),
SensorReading("1", 1111111112, 21.22)
))
//一:基本转换算子
//转换算子一: map 将输入数据 一对一处理
val map = stream1.map(t=> (t.id, t.timestamps, t.temperature*2))
map.print("map transform:")
//转换算子二:
// 用法一:flapMap 将输入数据打散成一个List
val flapMap = stream1.flatMap(t=> List(t.id, t.timestamps, t.temperature))
.map(t=> (t,t.toString.toDouble*2))
flapMap.print("flapMap transform:")
//用法二:flapMap 将输入的list数据 组合在一起 例如wordcount
// val flapMap1 = List("a b c d", "b c f e").flatMap(_.split(" "))
// print(flapMap1)
//转换算子三:fliter 过滤出表达式返回true的数据
val fil = stream1.filter(t => t.id == "1" && t.id.toInt%2!=0)
fil.print("fliter transform:")
// 读取一个本地文件
val inputPath = "/Users/xietong/IdeaProjects/FlinkTutorial/src/main/resources/1.txt"
val inputDataSet = env.readTextFile(inputPath)
//转换算子四:keyBy DataStream->KeyedStream 分区不分流只是将流对象转换了 数据形式没改变只是可以进行聚合操作了 其实就是分组了
val sensors: DataStream[SensorReading] = inputDataSet.map{ line=>
SensorReading(line.split(",")(0),line.split(",")(1).toLong,line.split(",")(2).toDouble)
}
val keyBy: KeyedStream[SensorReading,String] = sensors.keyBy(_.id)
keyBy.print("keyBy transform:")
//二:简单聚合算子
//转换算子五:滚动聚合算子(sum max min) 进行keyBy后 可以执行聚合算子 sum max min reduce
val sum: DataStream[SensorReading] = keyBy.sum(2)
sum.print("sum transform:").setParallelism(1)
val max: DataStream[SensorReading] = keyBy.max(2)
max.print("max transform:").setParallelism(1)
val min: DataStream[SensorReading] = keyBy.min(2)
min.print("min transform:").setParallelism(1)
//转换算子六:reduce聚合(比较灵活) 当前传感器最新的温度+10 上一次的时间戳+1
val reduce: DataStream[SensorReading] = keyBy.reduce((x, y)=> SensorReading(x.id, x.timestamps+1, y.temperature+10))
reduce.print("reduce transform:").setParallelism(1)
//三:多流转换算子:拆分、过滤 split select配合使用
//转换算子七:split 将数据流DataStream转换为两个或者多个SplitStream
//转换算子八:select 从split分割而开的两个或者多个SplitStream获取出来两个或多个DataStream 只有二者结合才可将数据类型转换回来
val splitStream: SplitStream[SensorReading] = sensors.split(t=>{
if(t.temperature > 30) Seq("high") else Seq("low")
})
val high: DataStream[SensorReading] = splitStream.select("high")
val low: DataStream[SensorReading] = splitStream.select("low")
val all: DataStream[SensorReading] = splitStream.select("high","low")
high.print("high transform")
low.print("low transform")
all.print("all transform")
//四:多流转换算子:双流合并 connect
//转换算子九:connect 将两个DataStream转换成一个ConnectedStream 但是内部还是各自管理
//转换算子十:coMap/coFlatMap(真正执行的算子就是map/flatMap) 对connect算子的结果ConnectedStream进行各自的map/flatMap操作 然后再次合并为一个DataStream
//两个DataStream内部数据格式不需要一(优点)
val data1: DataStream[(String, Double)] = high.map(t=>(t.id, t.temperature))
val data2: DataStream[SensorReading] = low
val connectedStreams: ConnectedStreams[(String, Double),SensorReading] = data1.connect(data2)
//输出结果依旧可以不一致
val coMap = connectedStreams.map(t1 => (t1._1, t1._2, "high"),t2 => (t2.id, t2.temperature))
coMap.print("coMap transform")
//五:多流转换算子:多流合并 union
//转换算子十一:union 可以将多个(数据类型相同的)流DataStream合并为一个DataStream
val union: DataStream[SensorReading] = high.union(low)
union.print("union transform")
env.execute("transform test")
}
}
Flink学习:常用transform转换算子API
最新推荐文章于 2023-03-06 15:56:12 发布