需求说明:略
样例类:
case class Fault(id: Long, vin: String, alarmTime: Long, strategyKey: String, strategyName: String, strategyDesc: String, strategyLevel: String, actionQuality: String, remarks: String, dt: String)
测试数据:略
主代码:
package com.idea.fault.code
import com.alibaba.fastjson.{JSON, JSONObject}
import com.idea.fault.bean.Fault
import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
import org.apache.flink.api.common.functions.{FilterFunction, MapFunction}
import org.apache.flink.connector.jdbc.{JdbcConnectionOptions, JdbcExecutionOptions, JdbcSink, JdbcStatementBuilder}
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.table.api.Expressions.{$, lit}
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.table.api.{EnvironmentSettings, Over, Table}
import org.apache.flink.types.Row
import org.apache.flink.util.Collector
import java.lang
import java.sql.PreparedStatement
import java.text.SimpleDateFormat
import java.time.Duration
/**
* @description TODO
* @date 2022-06-10 9:08
* @author wanglin
* @version
*/
object DaisyChainTable {
def main(args: Array[String]): Unit = {
// TODO 1.创建执行环境(流环境和表环境)
val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
env.setParallelism(1)
val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)
// TODO 2.读取数据构建数据源
val source: DataStream[String] = env.socketTextStream("hadoop102", 9999)
// TODO 3.数据转换:将JSONString数据转换为样例类
// 3.1 增加一个字段id,因为数据库多了一个自增字段id,默认为0
// 3.2 增加一个字段actionQuality,默认为空字符串"",方便写表操作
// 3.3 数据转换:将JSONString数据转换为样例类
// 3.4 时间转换,将alarmTime转换为 日期
// 3.5打印测试
val mapDs: DataStream[Fault] = source.map(xx => {
val mapObj: JSONObject = JSON.parseObject(xx)
val id: Long = 0
val vin: String = mapObj.getString("vin")
val alarmTime: lang.Long = mapObj.getLong("alarmTime")
val strategyKey: String = mapObj.getString("strategyKey")
val strategyName: String = mapObj.getString("alarmStrategyName")
val strategyDesc: String = mapObj.getString("alarmStrategyDesc")
val strategyLevel: String = mapObj.getString("alarmLevel")
val actionQuality: String = ""
val remarks: String = ""
// 时间格式化为日期
val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
val dt: String = sdf.format(alarmTime)
Fault(id, vin, alarmTime, strategyKey, strategyName, strategyDesc, strategyLevel, actionQuality, remarks, dt)
})
// 打印测试
// mapDs.print("mapDs:")
// TODO 4.根据故障描述过滤出菊花链不更新故障数据(可以在代码 Stream API中过滤,也可以在 Table API或者 SQL中过滤)
//val filterDS: DataStream[JSONObject] = inDS.filter(JSONObject => JSONObject.getString("alarmStrategyDesc") == "菊花链路不更新")
val filterDs: DataStream[Fault] = mapDs.filter(new FilterFunction[Fault] {
override def filter(t: Fault): Boolean = ("菊花链路不更新").equals(t.strategyDesc)
})
//filterDs.print("filterDs:"+filterDs )
// TODO 5.提取事件时间并生成 watermark(主要处理迟到数据)
val faultWm: WatermarkStrategy[Fault] = WatermarkStrategy.forBoundedOutOfOrderness[Fault](Duration.ofSeconds(2))
.withTimestampAssigner(new SerializableTimestampAssigner[Fault] {
override def extractTimestamp(t: Fault, l: Long): Long = t.alarmTime
})
val wmDs: DataStream[Fault] = filterDs.assignTimestampsAndWatermarks(faultWm)
// 打印测试
// wmDs.print("wmDs:")
// TODO 6.将数据流转换为Table
val wmTable: Table = tableEnv.fromDataStream(wmDs, $("id"), $("vin"), $("alarmTime"), $("strategyKey"), $("strategyName"), $("strategyDesc"), $("strategyLevel"), $("actionQuality"), $("remarks"), $("dt"))
// TODO 表转换为流并打印输出
// val rowDs: DataStream[Row] = tableEnv.toAppendStream[Row](wmTable)
// TODO 表环境创建临时表
// tableEnv.createTemporaryView("wmTable",wmTable)
// TODO 用执行SQL的方式提取数据
// val faultTable: Table = tableEnv.sqlQuery("SELECT id, vin, alarmTime, strategyKey, strategyName, strategyDesc, strategyLevel, actionQuality, remarks, dt " + table)
// TODO 将表转换成流并打印输出
// TODO MySQLSinkUtil工具类
val sql: String = "insert into auto_diagnose_result_220517 (id, vin, event_time,strategy_key,strategy_name,strategy_desc,strategy_level,action_quality,remarks,dt) values (?,?,?,?,?,?,?,?,?,?)"
// TODO 定义测输出流接收超时数据和不在窗口内的数据
val outputTag: OutputTag[Fault] = new OutputTag[Fault]("side-output")
// TODO 分组、开窗并提取当前时间3分钟前的字段信息
val owtable: Table = wmTable.window(Over.partitionBy($("vin")).orderBy($("alarmTime")).preceding(lit(3).minutes()).as("ow"))
.select($("id"), $("vin"), $("alarmTime"), $("strategyKey"), $("strategyName"), $("strategyDesc"), $("strategyLevel"), $("actionQuality"), $("remarks"), $("dt"))
// TODO 将表转换成流并开窗:会话窗口,如果5分钟没有数据接入,关闭窗口,如果有新数据接入,重新开窗
//tableEnv.toRetractStream[(Long, String, Long, String, String, String, String, String, String, String)]
//val owDS: DataStream[Row] = tableEnv.toAppendStream[Row](owtable)
//owDS.print("owDS:"+owDS)
// TODO 写入MySQL结果库
// TODO 将 owtable 转换为样例类 Fault,并写入MySQL,此种情况报:"电机故障无法行驶"(注:窗口数据是批数据,所以有重复数据)
/*val owDs: DataStream[(Boolean, Fault)] = tableEnv.toRetractStream[Fault](owtable)
owDs.print("owDs:")*/
val owDs: DataStream[Fault] = tableEnv.toAppendStream[Fault](owtable)
owDs.print("owDs:")
// TODO 将owDs转换为自定义样例类,写入mysql
owDs.map(new MapFunction[Fault,Fault] {
override def map(value: Fault): Fault = {
val id: Long = value.id
val vin: String = value.vin
val event_time: Long = value.alarmTime
val strategy_key: String = value.strategyKey
val strategy_name: String = "Helios_内部菊花链不更新Level_4_M01"
val strategy_desc: String = value.strategyDesc
val strategy_level: String = value.strategyLevel
val action_quality: String = value.actionQuality
val remarks: String = value.remarks
val dt: String = value.dt
Fault(id, vin, event_time, strategy_key, strategy_name, strategy_desc, strategy_level, action_quality, remarks, dt)
}
})
// TODO 将owDs开窗:会话窗口,如果2小时没有数据接入,关闭窗口,如果有新数据接入,重新开窗
val sessionDs: DataStream[Fault] = owDs.keyBy(_.vin)
.window(EventTimeSessionWindows.withGap(Time.hours(2)))
.process(new ProcessWindowFunction[Fault, Fault, String, TimeWindow] {
override def process(key: String, context: Context, elements: Iterable[Fault], out: Collector[Fault]): Unit = {
// 取出数据并写出去
val faultIterator: Fault = elements.iterator.next()
out.collect(faultIterator)
}
})
// TODO 其他:其他数据包含两种数据,一种是迟到数据,一种是会话窗口数据
// TODO 获取迟到数据和其他数据,并写入MySQL
//val sideOutputDs: DataStream[Fault] = sessionDs.getSideOutput(new OutputTag[Fault]("side-output"))
val sideOutputDs: DataStream[Fault] = sessionDs.getSideOutput(outputTag)
/* sideOutputDs.addSink(JdbcSink.sink("insert into auto_diagnose_result_220517 (id, vin, event_time,strategy_key,strategy_name,strategy_desc,strategy_level,action_quality,remarks,dt) values (?, ?,?,?,?,?,?,?,?,?)",
(ps,t)
))*/
// TODO 构建JdbcSink
val jdbcSink: SinkFunction[Fault] = JdbcSink.sink(sql, new JdbcStatementBuilder[Fault] {
override def accept(t: PreparedStatement, u: Fault): Unit = {
t.setLong(1, u.id)
t.setString(2, u.vin)
t.setLong(3, u.alarmTime)
t.setString(4, u.strategyKey)
t.setString(5, u.strategyName)
t.setString(6, u.strategyDesc)
t.setString(7, u.strategyLevel)
t.setString(8, u.actionQuality)
t.setString(9, u.remarks)
t.setString(10, u.dt)
}
}, JdbcExecutionOptions
.builder.withBatchSize(1000)
.withBatchIntervalMs(200)
.withMaxRetries(5)
.build,
new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
.withUrl("jdbc:mysql://saos-guardian.rdsmy5yi5ig1v3e.rds.bj.baidubce.com:3306/pbd_auto_diagnose?autoReconnect=true")
.withDriverName("com.mysql.cj.jdbc.Driver")
.withUsername("pbd_diagnose_rw")
.withPassword("mT2qEV6ijlcWogcj")
.build)
// TODO 封装
// TODO 消息推送
// TODO 启动任务
env.execute("DaisyChainSQL")
}
}