Flink采用HiveCatalog作为表元数据持久化的介质。对于同时部署了Hive和Flink的公司来说,可以方便管理元数据,而对于只部署了Flink的公司来说,HiveCatalog也是Flink唯一支持的元数据持久化的介质。不将元数据持久化的时候,开发过程中的每个地方都需要使用DDL重新将Kafka等数据源的数据注册到临时的Catalog中,浪费了很多精力和时间。
利用ODS的订单详情和订单明细数据进行操作。
DWD层直接上代码
import java.time.Duration
import org.apache.flink.streaming.api.{CheckpointingMode, TimeCharacteristic}
import org.apache.flink.streaming.api.environment.{ExecutionCheckpointingOptions, StreamExecutionEnvironment}
import org.apache.flink.table.api.{EnvironmentSettings, SqlDialect}
import org.apache.flink.table.api.bridge.java.StreamTableEnvironment
import org.apache.flink.table.catalog.hive.HiveCatalog
//case class orderMaster(area: String, orderID: String, brandId: String, customerID: String, orderStatus: String, orderChannel: String, storeCode: String, ts: String)
object Kafka_Sink_Kafka_DWD_OrderFull {
def main(args: Array[String]): Unit = {
val streamEnv = StreamExecutionEnvironment.getExecutionEnvironment
streamEnv.setStreamTimeCharacteristic(TimeCharacteristic.EventTime)
streamEnv.setParallelism(3)
val tableEnvSettings = EnvironmentSettings.newInstance()
.useBlinkPlanner()
.inStreamingMode()
.build()
val tableEnv = StreamTableEnvironment.create(streamEnv, tableEnvSettings)
tableEnv.getConfig.getConfiguration.set(ExecutionCheckpointingOptions.CHECKPOINTING_MODE, CheckpointingMode.EXACTLY_ONCE)
tableEnv.getConfig.getConfiguration.set(ExecutionCheckpointingOptions.CHECKPOINTING_INTERVAL, Duration.ofSeconds(20))
val catalogName = "my_catalog"
// 注册HiveCatalog
val catalog = new HiveCatalog(
catalogName, // catalog name
"goodme", // default database
"D:\\IdeaProjects\\Flink_Hive\\src\\main\\resources", // Hive config (hive-site.xml) directory
"2.3.6" // Hive version
)
tableEnv.registerCatalog(catalogName, catalog)
tableEnv.useCatalog(catalogName)
//ordermaster
// tableEnv.executeSql("DROP TABLE IF EXISTS goodme.orderMaster")
tableEnv.executeSql(
"""
|CREATE TABLE IF NOT EXISTS goodme.orderMaster (
| area STRING,
| orderID BIGINT,
| brandId STRING,
| customerID STRING,
| orderStatus STRING,
| orderChannel STRING,
| storeCode BIGINT,
| odsts BIGINT,
| procTime AS PROCTIME(),
| eventTime AS TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),
| t1 as DATE_FORMAT(TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),'yyyy-MM-dd'),
| t2 as DATE_FORMAT(TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),'HH'),
| t3 as DATE_FORMAT(TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),'mm'),
| WATERMARK FOR eventTime AS eventTime - INTERVAL '15' SECOND
|) WITH (
| 'connector'='kafka',
| 'topic'='GoodmeODS_orderMaster',
| 'properties.bootstrap.servers'='hadoop002:9092,hadoop003:9092,hadoop004:9092',
| 'properties.group.id'='consumer-group',
| 'scan.startup.mode' = 'latest-offset',
| 'format'='json',
| 'json.fail-on-missing-field'='false',
| 'json.ignore-parse-errors'='true'
|)
""".stripMargin
)
//orderDetail
// tableEnv.executeSql("DROP TABLE IF EXISTS goodme.orderDetail")
tableEnv.executeSql(
"""
|CREATE TABLE IF NOT EXISTS goodme.orderDetail (
| foodName STRING,
| payAmount BIGINT,
| orderID BIGINT,
| foodAmount STRING,
| odsts BIGINT,
| eventTime AS TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),
| procTime AS PROCTIME(),
| WATERMARK FOR eventTime AS eventTime - INTERVAL '15' SECOND
|) WITH (
| 'connector'='kafka',
| 'topic'='GoodmeODS_orderDetail',
| 'properties.bootstrap.servers'='hadoop002:9092,hadoop003:9092,hadoop004:9092',
| 'properties.group.id'='consumer-group',
| 'scan.startup.mode' = 'latest-offset',
| 'format'='json',
| 'json.fail-on-missing-field'='false',
| 'json.ignore-parse-errors'='true'
|)
""".stripMargin
)
//orderFull
// tableEnv.executeSql("DROP TABLE IF EXISTS goodme.orderFull")
tableEnv.executeSql(
"""
|CREATE TABLE IF NOT EXISTS goodme.orderFull (
| area STRING,
| orderID BIGINT,
| brandId STRING,
| customerID STRING,
| orderStatus STRING,
| orderChannel STRING,
| storeCode BIGINT,
| foodName STRING,
| payAmount BIGINT,
| orderID2 BIGINT,
| foodAmount STRING,
| odsts BIGINT,
| eventTime AS TO_TIMESTAMP(FROM_UNIXTIME(odsts / 1000,'yyyy-MM-dd HH:mm:ss')),
| procTime AS PROCTIME(),
| WATERMARK FOR eventTime AS eventTime - INTERVAL '15' SECOND
|) WITH (
| 'connector'='kafka',
| 'topic'='GoodmeODS_orderFull',
| 'properties.bootstrap.servers'='hadoop002:9092,hadoop003:9092,hadoop004:9092',
| 'properties.group.id'='consumer-group',
| 'scan.startup.mode' = 'latest-offset',
| 'format'='json',
| 'json.fail-on-missing-field'='false',
| 'json.ignore-parse-errors'='true'
|)
""".stripMargin
)
tableEnv.executeSql(
"""
|INSERT INTO goodme.orderFull
|SELECT
|orderMaster.area,
|orderMaster.orderID,
|orderMaster.brandId,
|orderMaster.customerID,
|orderMaster.orderStatus,
|orderMaster.orderChannel,
|orderMaster.storeCode,
|orderDetail.foodName,
|orderDetail.payAmount,
|orderDetail.orderID,
|orderDetail.foodAmount,
|orderDetail.odsts
|FROM orderMaster INNER JOIN orderDetail
|ON orderMaster.orderID = orderDetail.orderID
""".stripMargin
)
tableEnv.executeSql("select * from goodme.orderFull").print()
}
}