Flink transform算子Union多流合并

  • flink1.11实现union三条流合并
  • 两条Kafka流和mysql cdc流进行合并
  • 可用于实时维表建模
package com.transform

import java.util.Properties

import org.apache.flink.api.common.serialization.SimpleStringSchema
import org.apache.flink.streaming.api.scala._
import org.apache.flink.streaming.connectors.kafka.FlinkKafkaConsumer
import org.apache.flink.table.api._
import org.apache.flink.table.api.bridge.scala.StreamTableEnvironment
import org.apache.flink.types.Row

object Union {
  //用户信息样例类
  case class User(id: String, name: String, sex:String, age:Int, ts:Long)
  def main(args: Array[String]): Unit = {
    val env = StreamExecutionEnvironment.getExecutionEnvironment
    val bsSettings = EnvironmentSettings.newInstance.useBlinkPlanner.inStreamingMode.build
    val sTableEnv = StreamTableEnvironment.create(env, bsSettings)
    //log
    val properties = new Properties()
    properties.setProperty("bootstrap.servers", "jeff200:9092")
    val test1Consumer = new FlinkKafkaConsumer[String](
      "test1",
      new SimpleStringSchema(),
      properties
    )
    val test2Consumer = new FlinkKafkaConsumer[String](
      "test2",
      new SimpleStringSchema(),
      properties
    )
    test1Consumer.setStartFromEarliest()
    test2Consumer.setStartFromEarliest()
    val test1DataStream = env.addSource(test1Consumer)
    //kafka合流
    val testDataStream = env.addSource(test2Consumer)
      .union(test1DataStream)
      .map(data=>{
        val arr = data.split(",")
        User(arr(0), arr(1), arr(2), arr(3).toInt, arr(4).toLong)
      })

    //mysql
    val userDDL =
      s"""
         |CREATE TABLE t_user (
         | uid int,
         | name string
         |) WITH (
         | 'connector' = 'mysql-cdc',
         | 'hostname' = 'jeff200',
         | 'port' = '3306',
         | 'username' = 'root',
         | 'password' = 'root',
         | 'database-name' = 'test_db',
         | 'table-name' = 't_user'
         |)
         |""".stripMargin
    sTableEnv.executeSql(userDDL)
    val filterSql =
      s"""
         |SELECT uid, name
         |FROM t_user
         |WHERE uid > 0
       """.stripMargin
    val table: Table = sTableEnv.sqlQuery(filterSql)
    val tableDataStream:DataStream[(Boolean, Row)] = sTableEnv.toRetractStream(table)
    val userDataStream = tableDataStream
      .map(data=>{
        User(
          id = data._2.getField(0).toString,
          name = data._2.getField(1).toString,
          sex = "数据库",
          age = 100,
          ts = 0
        )
      })
    //table合流
    val unionDataStream = testDataStream
      .union(userDataStream)
    unionDataStream.print()
    env.execute("Union Job")
  }
}
  • 样例数据
# test1
1,男,张三,20,1605970941
2,女,莉莉,30,1605970922
3,女,红红,30,1605970913
# test2
4,男,李四,24,1605970904
5,男,王五,25,1605970965
6,男,小明,20,1605970946
  • 主要依赖
<!-- flink table api -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-api-scala-bridge_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-table-planner-blink_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope}</scope>
        </dependency>

        <!-- Flink-CDC -->
        <dependency>
            <groupId>com.alibaba.ververica</groupId>
            <artifactId>flink-connector-mysql-cdc</artifactId>
            <version>1.1.0</version>
        </dependency>

        <!-- kafka api -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-connector-kafka_2.11</artifactId>
            <version>${flink.version}</version>
            <scope>${scope}</scope>
        </dependency>
        <dependency>
            <groupId>org.apache.kafka</groupId>
            <artifactId>kafka-clients</artifactId>
            <version>${kafka.version}</version>
        </dependency>
        <!-- flink json -->
        <dependency>
            <groupId>org.apache.flink</groupId>
            <artifactId>flink-json</artifactId>
            <version>${flink.version}</version>
            <scope>${scope}</scope>
        </dependency>
  • 运行结果
    在这里插入图片描述
  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值