1. 表
2. 更新模式
3. 输出到mysql
4. 动态表转换成DataStream
5. 时间特性
6. 窗口
6.1. 案例
package com.xiaofan.apitest.tabletest
import com.xiaofan.apitest.source.SensorReading
import org.apache.flink.streaming.api.TimeCharacteristic
import org.apache.flink.streaming.api.functions.timestamps.BoundedOutOfOrdernessTimestampExtractor
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.table.api._
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row
object TimeAndWindowTest {
def main(args: Array[String]): Unit = {
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
env.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
val tabEnv: StreamTableEnvironment = StreamTableEnvironment.create(env)
val inputPath = "D:\\big-data\\code\\FlinkTutorial\\src\\main\\resources\\sensor.txt"
val inputStream: DataStream[String] = env.readTextFile(inputPath)
val dataStream: DataStream[SensorReading] = inputStream.map(
data => {
val arr: Array[String] = data.split(",")
SensorReading(arr(0), arr(1).toLong, arr(2).toDouble)
}
).assignTimestampsAndWatermarks(new BoundedOutOfOrdernessTimestampExtractor[SensorReading](Time.milliseconds(3)) {
override def extractTimestamp(element: SensorReading) = element.timestamp * 1000L
})
val sensorTable: Table = tabEnv.fromDataStream(dataStream, $"id", $"temperature", $"timestamp".rowtime() as "ts")
// sensorTable.printSchema()
// tabEnv.toAppendStream[Row](sensorTable).print()
// 1. Group Window
// 1.1. table api
val resultTable: Table = sensorTable
// .window(Tumble.over(10.seconds()).on($"ts").as($"tw")) // 每10秒统计一次,滚动时间窗口
.window(Tumble over 10.seconds on $"ts" as $"tw")
.groupBy($"id", $"tw")
.select($"id", $"id".count, $"temperature".avg, $"tw".end)
// 1.2. sql
tabEnv.createTemporaryView("sensor", sensorTable)
val sqlResultTable: Table = tabEnv.sqlQuery(
"""
|select
| id,
| count(id),
| avg(temperature),
| tumble_end(ts, interval '10' second)
|from sensor
|group by
| id,
| tumble(ts, interval '10' second)
|""".stripMargin)
// timeResultTable.printSchema()
// 2. Over window: 统计每个sensor每条数据,与之前两行数据的平均温度
// 2.1. table api
val overResultTable: Table = sensorTable
.window(Over partitionBy $"id" orderBy $"ts" preceding 2.rows as $"ow")
.select("$id", $"ts", $"id".count over $"ow", $"temperature".avg over $"ow")
// 2.2. sql
val sqlOverWindowResult: Table = tabEnv.sqlQuery(
"""
|select
| id,
| ts,
| count(id) over ow,
| avg(temperature) over ow
|from sensor
|window ow as (
| partition by id
| order by ts
| rows between 2 preceding and current row
|)
|""".stripMargin)
tabEnv.toAppendStream[Row](resultTable).print("table")
tabEnv.toRetractStream[Row](sqlResultTable).print("sql")
env.execute("time and window test")
}
}
7. 函数
7.1. 一对一,scalar函数
val sensorTable: Table = tabEnv.fromDataStream(dataStream, $"id", $"temperature", $"timestamp".rowtime() as "ts")
// 调用自定义函数,对id进行hash运算
// 1. table api
val hashCode = new HashCode(2)
val resultTable: Table = sensorTable
.select($"id", $"ts", hashCode($"id"))
// 2. sql
tabEnv.createTemporaryView("sensor", sensorTable)
// 注册函数
tabEnv.createTemporaryFunction("hashCode", hashCode)
val resultSqlTable: Table = tabEnv.sqlQuery("select id, ts, hashCode(id) from sensor")
/**
* 自定义标量函数
*/
class HashCode(factor: Int) extends ScalarFunction {
// 这个函数名字不能变
def eval(s: String): Int = {
s.hashCode * factor - 10000
}
}
- 一对多
val sensorTable: Table = tabEnv.fromDataStream(dataStream, $"id", $"temperature", $"timestamp".rowtime() as "ts")
// 调用自定义函数,对id进行hash运算
// 1. table api
val split = new Split("_")
val resultTable: Table = sensorTable
.joinLateral(split($"id") as("word", "length"))
.select($"id", $"temperature", $"word", $"length")
// 2. sql
tabEnv.createTemporaryView("sensor", sensorTable)
tabEnv.createTemporaryFunction("split", split)
val sqlResultTable: Table = tabEnv.sqlQuery(
"""
|select
| id, ts, word, length
|from
| sensor, lateral table( split(id) ) as splitid (word, length)
|""".stripMargin)
tabEnv.toAppendStream[Row](sqlResultTable).print("sql ")
/**
* 自定义Table函数
*/
class Split(var separator: String) extends TableFunction[(String, Int)] {
def eval(str: String): Unit = {
str.split(separator).foreach(
word => collect((word, word.length))
)
}
}
- 多对一 聚合函数
val sensorTable: Table = tabEnv.fromDataStream(dataStream, $"id", $"temperature", $"timestamp".rowtime() as "ts")
// table api
val avgTemp = new AvgTemp
tabEnv.createTemporaryFunction("avgTemp", avgTemp)
val resultTable: Table = sensorTable
.groupBy($"id")
.aggregate(avgTemp($"temperature") as "avgTemp")
.select($"id", $"avgTemp")
// sql
tabEnv.createTemporaryView("sensor", sensorTable)
val sqlTableResult: Table = tabEnv.sqlQuery(
"""
|select id, avgTemp(temperature)
|from
|sensor
|group by id
|""".stripMargin)
tabEnv.toRetractStream[Row](sqlTableResult).print("table")
case class AvgTempAcc(var sum: Double = 0.0, var count: Int = 0)
/**
* 自定义聚合函数,求每个传感器的平均温度值, 保存状态(tempSum, tempCount)
*/
class AvgTemp extends AggregateFunction[Double, AvgTempAcc] {
def accumulate(accumulator: AvgTempAcc, temp: Double): Unit = {
accumulator.sum += temp
accumulator.count += 1
}
override def getValue(accumulator: AvgTempAcc): Double = accumulator.sum / accumulator.count
override def createAccumulator(): AvgTempAcc = AvgTempAcc()
}
- 多对多 – 表聚合函数
val sensorTable: Table = tabEnv.fromDataStream(dataStream, $"id", $"temperature", $"timestamp".rowtime() as "ts")
// table api
val top2Temp = new Top2Temp
tabEnv.registerFunction("top2Temp", top2Temp)
val resultTable: Table = sensorTable
.groupBy($"id")
.flatAggregate(top2Temp($"temperature") as ("temp", "rank"))
.select($"id", $"temp", $"rank")
tabEnv.toRetractStream[Row](resultTable).print("table")
env.execute("function test")
case class Top2TempAcc(var highestTemp: Double = Double.MinValue, var secondHighestTemp: Double = Double.MinValue)
/**
* 自定义表聚合函数,提取素有温度值中最高的两个温度, 输入(temp,rank)
*/
class Top2Temp extends TableAggregateFunction[(Double, Int), Top2TempAcc] {
override def createAccumulator(): Top2TempAcc = Top2TempAcc()
// 实现计算聚合结果的函数
def accumulate(acc: Top2TempAcc, temp: Double): Unit = {
if (temp > acc.highestTemp) {
acc.secondHighestTemp = acc.highestTemp
acc.highestTemp = temp
} else if(temp > acc.secondHighestTemp) {
acc.secondHighestTemp = temp
}
}
// 实现一个输出结果的方法, 追中处理完表中所有数据时调用, 方法名不能变
def emitValue(acc: Top2TempAcc, out: Collector[(Double, Int)]): Unit = {
out.collect((acc.highestTemp, 1))
out.collect((acc.secondHighestTemp, 2))
}
}