文件
sensor_1,1547718199,35.8
sensor_6,1547718201,15.4
sensor_7,1547718202,6.7
sensor_10,1547718205,38.1
sensor_1,1547718129,29.8
sensor_1,1547718158,5.8
sensor_1,1547718140,40.8
sensor_1,1547718111,11.8
package com.apitest
import org.apache.flink.api.common.functions.{FilterFunction, ReduceFunction, RichMapFunction}
import org.apache.flink.configuration.Configuration
import org.apache.flink.streaming.api.scala._
//定义样例类,温度传感器
case class SensorReading2(id:String,timestamp:Long,temperature:Double)
object TransformTest {
def main(args: Array[String]): Unit = {
val env=StreamExecutionEnvironment.getExecutionEnvironment
val inputPath ="D:\\workspace\\ideastudy\\flinkstudy\\src\\main\\scala\\com\\apitest\\sensor.txt"
env.setParallelism(1)//为了测试保持读取数据的顺序
val inputStream = env.readTextFile(inputPath)
//1.先转换成样例类类型
val dataStream= inputStream.map(data=>{
val arr = data.split(",")
SensorReading2(arr(0),arr(1).toLong,arr(2).toDouble)
})
//dataStream.print()
//2.分组聚合,输出每个传感器当前温度最小值
val aggStream=dataStream
.keyBy("id") //根据id进行分组
.minBy("temperature")
//aggStream.print()
//3.需要输出当前最小的温度值,以及最近的时间戳
val resultStream = dataStream
.keyBy("id")
// .reduce((curState,newData)=>
// SensorReading2(curState.id,newData.timestamp,curState.temperature.min(newData.temperature))
// ) 第一种写法用表达式
.reduce(new MyReduceFunction)//第二种写法 传入一个函数类
//resultStream.print()
//4.多流转换操作
//4.1 分流,将传感器温度数据分为低温、高温两条流
val splitStream=dataStream
.split(data=>{
if(data.temperature>30.0) {
Seq("high")
}else{
Seq("low")
}
})
val highTempStream = splitStream.select("high")
val lowTempStream = splitStream.select("low")
val allTempStream = splitStream.select("high","low")
// highTempStream.print("high")
// lowTempStream.print("low")
// allTempStream.print("all")
//4.2 合流操作
val warningStream=highTempStream.map(data => (data.id,data.temperature))
val connectedStreams = warningStream.connect(lowTempStream) //合流数据类型可以不一样
// 用coMap对数据进行分别处理
val coMapResultStream=connectedStreams
.map(
warningData=>(warningData._1,warningData._2,"warning"),
lowTempData=>(lowTempData.id,"healthy")
) //coMap处理的时候返回值也可以不一样
// coMapResultStream.print("coMap")
//4.3 union 合流 ,数据必须一致
val unionStream =highTempStream.union(lowTempStream)
//5.自定义filter
val filterStream=dataStream.filter(new MyFilter)
//filterStream.print()
env.execute("transform test")
}
}
class MyReduceFunction extends ReduceFunction[SensorReading2]{
override def reduce(value1: SensorReading2, value2: SensorReading2): SensorReading2 = {
SensorReading2(value1.id,value2.timestamp,value1.temperature.min(value2.temperature))
}
}
class MyFilter extends FilterFunction[SensorReading2]{
override def filter(t: SensorReading2): Boolean = {
t.id.startsWith("sensor_1")
}
}
//富函数,可以获取到运行时上下文,还有一些生命周期
class MyRichMapper extends RichMapFunction[SensorReading2,String]{
override def open(parameters: Configuration): Unit = {
//做一些初始化操作 比如 数据库的链接
getRuntimeContext()
}
override def map(in: SensorReading2): String = {
in.id + "temperature"
}
override def close(): Unit = {
//一般做收尾工作,比如关闭链接,或者清空状态
}
}