基于Flink+Hive+Kafka的流批一体数仓实践--04Kafka_Sink_Kafka_DWD_OrderFull

Flink采用HiveCatalog作为表元数据持久化的介质。对于同时部署了Hive和Flink的公司来说,可以方便管理元数据,而对于只部署了Flink的公司来说,HiveCatalog也是Flink唯一支持的元数据持久化的介质。不将元数据持久化的时候,开发过程中的每个地方都需要使用DDL重新将Kafka等数据源的数据注册到临时的Catalog中,浪费了很多精力和时间。

利用ODS的订单详情和订单明细数据进行操作。

DWD层直接上代码

import java.time.Duration
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.environment.{ExecutionCheckpointingOptions, StreamExecutionEnvironment}
import org.apache.flink.table.api.{EnvironmentSettings, SqlDialect}
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment
import org.apache.flink.table.catalog.hive.HiveCatalog

//case class orderMaster(area: String, orderID: String, brandId: String, customerID: String, orderStatus: String, orderChannel: String, storeCode: String, ts: String)

object Kafka_Sink_Kafka_DWD_OrderFull {
  def main(args: Array[String]): Unit = {
    val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
    streamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
    streamEnv.setParallelism(3)

    val tableEnvSettings = EnvironmentSettings.newInstance()
      .useBlinkPlanner()
      .inStreamingMode()
      .build()
    val tableEnv = StreamTableEnvironment.create(streamEnv, tableEnvSettings)
    tableEnv.getConfig.getConfiguration.set(ExecutionCheckpointingOptions.CHECKPOINTING_MODE, CheckpointingMode.EXACTLY_ONCE)
    tableEnv.getConfig.getConfiguration.set(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL, Duration.ofSeconds(20))
    val catalogName = "my_catalog"
    //    注册HiveCatalog
    val catalog = new HiveCatalog(
      catalogName, // catalog name
      "goodme", // default database
      "D:\\IdeaProjects\\Flink_Hive\\src\\main\\resources", // Hive config (hive-site.xml) directory
      "2.3.6" // Hive version
    )
    tableEnv.registerCatalog(catalogName, catalog)
    tableEnv.useCatalog(catalogName)
    //ordermaster
    //    tableEnv.executeSql("DROP TABLE IF EXISTS goodme.orderMaster")
    tableEnv.executeSql(
      """
        |CREATE TABLE IF NOT EXISTS goodme.orderMaster (
        |  area STRING,
        |  orderID BIGINT,
        |  brandId STRING,
        |  customerID STRING,
        |  orderStatus STRING,
        |  orderChannel STRING,
        |  storeCode BIGINT,
        |  odsts BIGINT,
        |  procTime AS PROCTIME(),
        |  eventTime AS TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),
        |  t1 as DATE_FORMAT(TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),'yyyy-MM-dd'),
        |  t2 as DATE_FORMAT(TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),'HH'),
        |  t3 as DATE_FORMAT(TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),'mm'),
        |  WATERMARK FOR eventTime AS eventTime - INTERVAL '15' SECOND
        |) WITH (
        |  'connector'='kafka',
        |  'topic'='GoodmeODS_orderMaster',
        |  'properties.bootstrap.servers'='hadoop002:9092,hadoop003:9092,hadoop004:9092',
        |  'properties.group.id'='consumer-group',
        |  'scan.startup.mode' = 'latest-offset',
        |  'format'='json',
        |  'json.fail-on-missing-field'='false',
        |  'json.ignore-parse-errors'='true'
        |)
      """.stripMargin
    )
    //orderDetail
    //    tableEnv.executeSql("DROP TABLE IF EXISTS goodme.orderDetail")
    tableEnv.executeSql(
      """
        |CREATE TABLE IF NOT EXISTS goodme.orderDetail (
        |  foodName STRING,
        |  payAmount BIGINT,
        |  orderID BIGINT,
        |  foodAmount STRING,
        |  odsts BIGINT,
        |  eventTime AS TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),
        |  procTime AS PROCTIME(),
        |  WATERMARK FOR eventTime AS eventTime - INTERVAL '15' SECOND
        |) WITH (
        |  'connector'='kafka',
        |  'topic'='GoodmeODS_orderDetail',
        |  'properties.bootstrap.servers'='hadoop002:9092,hadoop003:9092,hadoop004:9092',
        |  'properties.group.id'='consumer-group',
        |  'scan.startup.mode' = 'latest-offset',
        |  'format'='json',
        |  'json.fail-on-missing-field'='false',
        |  'json.ignore-parse-errors'='true'
        |)
      """.stripMargin
    )
    //orderFull
    //    tableEnv.executeSql("DROP TABLE IF EXISTS goodme.orderFull")
    tableEnv.executeSql(
      """
        |CREATE TABLE IF NOT EXISTS goodme.orderFull (
        |  area STRING,
        |  orderID BIGINT,
        |  brandId STRING,
        |  customerID STRING,
        |  orderStatus STRING,
        |  orderChannel STRING,
        |  storeCode BIGINT,
        |  foodName STRING,
        |  payAmount BIGINT,
        |  orderID2 BIGINT,
        |  foodAmount STRING,
        |  odsts BIGINT,
        |  eventTime AS TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),
        |  procTime AS PROCTIME(),
        |  WATERMARK FOR eventTime AS eventTime - INTERVAL '15' SECOND
        |) WITH (
        |  'connector'='kafka',
        |  'topic'='GoodmeODS_orderFull',
        |  'properties.bootstrap.servers'='hadoop002:9092,hadoop003:9092,hadoop004:9092',
        |  'properties.group.id'='consumer-group',
        |  'scan.startup.mode' = 'latest-offset',
        |  'format'='json',
        |  'json.fail-on-missing-field'='false',
        |  'json.ignore-parse-errors'='true'
        |)
      """.stripMargin
    )
    tableEnv.executeSql(
      """
        |INSERT INTO goodme.orderFull
        |SELECT
        |orderMaster.area,
        |orderMaster.orderID,
        |orderMaster.brandId,
        |orderMaster.customerID,
        |orderMaster.orderStatus,
        |orderMaster.orderChannel,
        |orderMaster.storeCode,
        |orderDetail.foodName,
        |orderDetail.payAmount,
        |orderDetail.orderID,
        |orderDetail.foodAmount,
        |orderDetail.odsts
        |FROM orderMaster INNER JOIN orderDetail
        |ON orderMaster.orderID = orderDetail.orderID
      """.stripMargin
    )
    tableEnv.executeSql("select * from goodme.orderFull").print()
  }
}
  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值