Flink Table Api经典案例

需求说明:略

样例类:

case class Fault(id: Long, vin: String, alarmTime: Long, strategyKey: String, strategyName: String, strategyDesc: String, strategyLevel: String, actionQuality: String, remarks: String, dt: String)

测试数据:略

主代码:

package com.idea.fault.code

import com.alibaba.fastjson.{JSON, JSONObject}
import com.idea.fault.bean.Fault
import org.apache.flink.api.common.eventtime.{SerializableTimestampAssigner, WatermarkStrategy}
import org.apache.flink.api.common.functions.{FilterFunction, MapFunction}
import org.apache.flink.connector.jdbc.{JdbcConnectionOptions, JdbcExecutionOptions, JdbcSink, JdbcStatementBuilder}
import org.apache.flink.streaming.api.functions.sink.SinkFunction
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.api.scala.function.ProcessWindowFunction
import org.apache.flink.streaming.api.windowing.assigners.EventTimeSessionWindows
import org.apache.flink.streaming.api.windowing.time.Time
import org.apache.flink.streaming.api.windowing.windows.TimeWindow
import org.apache.flink.table.api.Expressions.{$, lit}
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.table.api.{EnvironmentSettings, Over, Table}
import org.apache.flink.types.Row
import org.apache.flink.util.Collector

import java.lang
import java.sql.PreparedStatement
import java.text.SimpleDateFormat
import java.time.Duration

/**
 * @description TODO
 * @date 2022-06-10 9:08
 * @author wanglin
 * @version
 */
object DaisyChainTable {
  def main(args: Array[String]): Unit = {
    // TODO 1.创建执行环境(流环境和表环境)
    val env: StreamExecutionEnvironment = StreamExecutionEnvironment.getExecutionEnvironment
    env.setParallelism(1)
    val settings: EnvironmentSettings = EnvironmentSettings.newInstance().useBlinkPlanner().inStreamingMode().build()
    val tableEnv: StreamTableEnvironment = StreamTableEnvironment.create(env, settings)

    // TODO 2.读取数据构建数据源
    val source: DataStream[String] = env.socketTextStream("hadoop102", 9999)

    // TODO 3.数据转换:将JSONString数据转换为样例类
    // 3.1 增加一个字段id,因为数据库多了一个自增字段id,默认为0
    // 3.2 增加一个字段actionQuality,默认为空字符串"",方便写表操作
    // 3.3 数据转换:将JSONString数据转换为样例类
    // 3.4 时间转换,将alarmTime转换为 日期
    // 3.5打印测试
    val mapDs: DataStream[Fault] = source.map(xx => {
      val mapObj: JSONObject = JSON.parseObject(xx)
      val id: Long = 0
      val vin: String = mapObj.getString("vin")
      val alarmTime: lang.Long = mapObj.getLong("alarmTime")
      val strategyKey: String = mapObj.getString("strategyKey")
      val strategyName: String = mapObj.getString("alarmStrategyName")
      val strategyDesc: String = mapObj.getString("alarmStrategyDesc")
      val strategyLevel: String = mapObj.getString("alarmLevel")
      val actionQuality: String = ""
      val remarks: String = ""
      // 时间格式化为日期
      val sdf: SimpleDateFormat = new SimpleDateFormat("yyyy-MM-dd")
      val dt: String = sdf.format(alarmTime)
      Fault(id, vin, alarmTime, strategyKey, strategyName, strategyDesc, strategyLevel, actionQuality, remarks, dt)
    })
    // 打印测试
    // mapDs.print("mapDs:")

    // TODO 4.根据故障描述过滤出菊花链不更新故障数据(可以在代码 Stream API中过滤,也可以在 Table API或者 SQL中过滤)
    //val filterDS: DataStream[JSONObject] = inDS.filter(JSONObject => JSONObject.getString("alarmStrategyDesc") == "菊花链路不更新")
    val filterDs: DataStream[Fault] = mapDs.filter(new FilterFunction[Fault] {
      override def filter(t: Fault): Boolean = ("菊花链路不更新").equals(t.strategyDesc)
    })

    //filterDs.print("filterDs:"+filterDs )
    // TODO 5.提取事件时间并生成 watermark(主要处理迟到数据)
    val faultWm: WatermarkStrategy[Fault] = WatermarkStrategy.forBoundedOutOfOrderness[Fault](Duration.ofSeconds(2))
      .withTimestampAssigner(new SerializableTimestampAssigner[Fault] {
        override def extractTimestamp(t: Fault, l: Long): Long = t.alarmTime
      })
    val wmDs: DataStream[Fault] = filterDs.assignTimestampsAndWatermarks(faultWm)
    // 打印测试
    // wmDs.print("wmDs:")

    // TODO 6.将数据流转换为Table
    val wmTable: Table = tableEnv.fromDataStream(wmDs, $("id"), $("vin"), $("alarmTime"), $("strategyKey"), $("strategyName"), $("strategyDesc"), $("strategyLevel"), $("actionQuality"), $("remarks"), $("dt"))
    // TODO 表转换为流并打印输出
    // val rowDs: DataStream[Row] = tableEnv.toAppendStream[Row](wmTable)
    // TODO 表环境创建临时表
    // tableEnv.createTemporaryView("wmTable",wmTable)
    // TODO 用执行SQL的方式提取数据
    // val faultTable: Table = tableEnv.sqlQuery("SELECT id, vin, alarmTime, strategyKey, strategyName, strategyDesc, strategyLevel, actionQuality, remarks, dt " + table)
    // TODO 将表转换成流并打印输出
    // TODO MySQLSinkUtil工具类
    val sql: String = "insert into auto_diagnose_result_220517 (id, vin, event_time,strategy_key,strategy_name,strategy_desc,strategy_level,action_quality,remarks,dt) values (?,?,?,?,?,?,?,?,?,?)"

    // TODO 定义测输出流接收超时数据和不在窗口内的数据
    val outputTag: OutputTag[Fault] = new OutputTag[Fault]("side-output")
    // TODO 分组、开窗并提取当前时间3分钟前的字段信息
    val owtable: Table = wmTable.window(Over.partitionBy($("vin")).orderBy($("alarmTime")).preceding(lit(3).minutes()).as("ow"))
      .select($("id"), $("vin"), $("alarmTime"), $("strategyKey"), $("strategyName"), $("strategyDesc"), $("strategyLevel"), $("actionQuality"), $("remarks"), $("dt"))

    // TODO 将表转换成流并开窗:会话窗口,如果5分钟没有数据接入,关闭窗口,如果有新数据接入,重新开窗
    //tableEnv.toRetractStream[(Long, String, Long, String, String, String,  String,  String,  String,  String)]
    //val owDS: DataStream[Row] = tableEnv.toAppendStream[Row](owtable)
    //owDS.print("owDS:"+owDS)
    // TODO 写入MySQL结果库
    // TODO 将 owtable 转换为样例类 Fault,并写入MySQL,此种情况报:"电机故障无法行驶"(注:窗口数据是批数据,所以有重复数据)
    /*val owDs: DataStream[(Boolean, Fault)] = tableEnv.toRetractStream[Fault](owtable)
    owDs.print("owDs:")*/
    val owDs: DataStream[Fault] = tableEnv.toAppendStream[Fault](owtable)
    owDs.print("owDs:")
    // TODO 将owDs转换为自定义样例类,写入mysql
    owDs.map(new MapFunction[Fault,Fault] {
      override def map(value: Fault): Fault = {
        val id: Long = value.id
        val vin: String = value.vin
        val event_time: Long = value.alarmTime
        val strategy_key: String = value.strategyKey
        val strategy_name: String = "Helios_内部菊花链不更新Level_4_M01"
        val strategy_desc: String = value.strategyDesc
        val strategy_level: String = value.strategyLevel
        val action_quality: String = value.actionQuality
        val remarks: String = value.remarks
        val dt: String = value.dt

        Fault(id, vin, event_time, strategy_key, strategy_name, strategy_desc, strategy_level, action_quality, remarks, dt)

      }
    })
    // TODO 将owDs开窗:会话窗口,如果2小时没有数据接入,关闭窗口,如果有新数据接入,重新开窗
    val sessionDs: DataStream[Fault] = owDs.keyBy(_.vin)
      .window(EventTimeSessionWindows.withGap(Time.hours(2)))
      .process(new ProcessWindowFunction[Fault, Fault, String, TimeWindow] {
        override def process(key: String, context: Context, elements: Iterable[Fault], out: Collector[Fault]): Unit = {
          // 取出数据并写出去
          val faultIterator: Fault = elements.iterator.next()
          out.collect(faultIterator)
        }
      })
    // TODO 其他:其他数据包含两种数据,一种是迟到数据,一种是会话窗口数据
    // TODO 获取迟到数据和其他数据,并写入MySQL
    //val sideOutputDs: DataStream[Fault] = sessionDs.getSideOutput(new OutputTag[Fault]("side-output"))
    val sideOutputDs: DataStream[Fault] = sessionDs.getSideOutput(outputTag)
   /* sideOutputDs.addSink(JdbcSink.sink("insert into auto_diagnose_result_220517 (id, vin, event_time,strategy_key,strategy_name,strategy_desc,strategy_level,action_quality,remarks,dt) values (?, ?,?,?,?,?,?,?,?,?)",
      (ps,t)
    ))*/

    // TODO 构建JdbcSink
    val jdbcSink: SinkFunction[Fault] = JdbcSink.sink(sql, new JdbcStatementBuilder[Fault] {
      override def accept(t: PreparedStatement, u: Fault): Unit = {
        t.setLong(1, u.id)
        t.setString(2, u.vin)
        t.setLong(3, u.alarmTime)
        t.setString(4, u.strategyKey)
        t.setString(5, u.strategyName)
        t.setString(6, u.strategyDesc)
        t.setString(7, u.strategyLevel)
        t.setString(8, u.actionQuality)
        t.setString(9, u.remarks)
        t.setString(10, u.dt)
      }
    }, JdbcExecutionOptions
      .builder.withBatchSize(1000)
      .withBatchIntervalMs(200)
      .withMaxRetries(5)
      .build,
      new JdbcConnectionOptions.JdbcConnectionOptionsBuilder()
        .withUrl("jdbc:mysql://saos-guardian.rdsmy5yi5ig1v3e.rds.bj.baidubce.com:3306/pbd_auto_diagnose?autoReconnect=true")
        .withDriverName("com.mysql.cj.jdbc.Driver")
        .withUsername("pbd_diagnose_rw")
        .withPassword("mT2qEV6ijlcWogcj")
        .build)

    // TODO 封装
    // TODO 消息推送
    // TODO 启动任务
    env.execute("DaisyChainSQL")

  }


}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

cts618

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值