Hudi的应用(3)

第9章 湖仓一体之ODS层

ODS层的设计要点如下:

1)ODS层的表结构设计依托于从业务系统同步过来的数据结构。

2)先使用Flink SQL建表,然后将数据源(也就是8.4中的数据)进行insert操作,并补齐缺失的字段。

9.1日志表建表及数据装载

1)建表数据

CREATE TABLE `hudi_ods`.`ods_log`  (

  `uuid`     STRING,

  `common`   ROW<ar STRING,ba STRING,ch STRING,is_new STRING,md STRING,mid STRING,os STRING,uid STRING,vc STRING>,

  `page`     ROW<during_time STRING,item STRING,item_type STRING,last_page_id STRING,page_id STRING,source_type STRING> ,

  `actions`  ARRAY<ROW<action_id STRING,item STRING,item_type STRING,ts BIGINT>>,

  `displays` ARRAY<ROW<display_type STRING,item STRING,item_type STRING,`order` STRING,pos_id STRING>>,

  `start`    ROW<entry STRING,loading_time BIGINT,open_ad_id BIGINT,open_ad_ms BIGINT,open_ad_skip_ms BIGINT>,

  `err`      ROW<error_code BIGINT,msg STRING>,

  `ts`       BIGINT,

  `dt`       STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_log',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'uuid',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_log'

);

2)数据装载

set table.dynamic-table-options.enabled=true;

set table.exec.sink.not-null-enforcer=drop;

INSERT INTO hudi_ods.ods_log

select uuid() as uuid

       ,*

       ,FROM_UNIXTIME(ts/1000,'yyyy-MM-dd') as dt

from `kafka_log`.`kafka_topic_log`

9.2业务表建表及数据装载

9.2.1 活动信息表ods_activity_info

CREATE TABLE `hudi_ods`.`ods_activity_info`  (

  `id` bigint,

  `activity_name` varchar(200),

  `activity_type` varchar(10),

  `activity_desc` varchar(2000),

  `start_time` timestamp(0),

  `end_time` timestamp(0),

  `create_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_activity_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_activity_info');

9.2.2 活动规则表ods_activity_rule

CREATE TABLE `hudi_ods`.`ods_activity_rule`  (

  `id` int,

  `activity_id` int,

  `activity_type` varchar(20),

  `condition_amount` decimal(16, 2),

  `condition_num` bigint,

  `benefit_amount` decimal(16, 2),

  `benefit_discount` decimal(10, 2),

  `benefit_level` bigint,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_activity_rule',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_activity_rule');

9.2.3 一级品类表ods_base_category1

CREATE TABLE `hudi_ods`.`ods_base_category1`  (

  `id` bigint,

  `name` varchar(10),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_category1',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_category1');

9.2.4 二级品类表ods_base_category2

CREATE TABLE `hudi_ods`.`ods_base_category2`  (

  `id` bigint,

  `name` varchar(200),

  `category1_id` bigint,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_category2',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_category2');

9.2.5 三级品类表ods_base_category3

CREATE TABLE `hudi_ods`.`ods_base_category3`  (

  `id` bigint,

  `name` varchar(200),

  `category2_id` bigint,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_category3',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_category3');

9.2.6 编码字典表ods_base_dic

CREATE TABLE `hudi_ods`.`ods_base_dic`  (

  `dic_code` varchar(10),

  `dic_name` varchar(100),

  `parent_code` varchar(10),

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_dic',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dic_code',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_dic');

9.2.7 省份表ods_base_province

CREATE TABLE `hudi_ods`.`ods_base_province`  (

  `id` bigint,

  `name` varchar(20),

  `region_id` varchar(20),

  `area_code` varchar(20),

  `iso_code` varchar(20),

  `iso_3166_2` varchar(20),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_province',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_province');

9.2.8 地区表ods_base_region

CREATE TABLE `hudi_ods`.`ods_base_region`  (

  `id` varchar(20),

  `region_name` varchar(20),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_region',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_region');

9.2.9 品牌表ods_base_trademark

CREATE TABLE `hudi_ods`.`ods_base_trademark`  (

  `id` bigint,

  `tm_name` varchar(100),

  `logo_url` varchar(200),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_trademark',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_base_trademark');

9.2.10 购物车表ods_cart_info

CREATE TABLE `hudi_ods`.`ods_cart_info`  (

  `id` bigint,

  `user_id` varchar(200),

  `sku_id` bigint,

  `cart_price` decimal(10, 2),

  `sku_num` int,

  `img_url` varchar(200),

  `sku_name` varchar(200),

  `is_checked` int,

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `is_ordered` bigint,

  `order_time` timestamp(0),

  `source_type` varchar(20),

  `source_id` bigint,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_cart_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_cart_info');

9.2.11 优惠券信息表ods_coupon_info

CREATE TABLE `hudi_ods`.`ods_coupon_info`  (

  `id` bigint,

  `coupon_name` varchar(100),

  `coupon_type` varchar(10),

  `condition_amount` decimal(10, 2),

  `condition_num` bigint,

  `activity_id` bigint,

  `benefit_amount` decimal(16, 2),

  `benefit_discount` decimal(10, 2),

  `create_time` timestamp(0),

  `range_type` varchar(10),

  `limit_num` int,

  `taken_count` int,

  `start_time` timestamp(0),

  `end_time` timestamp(0),

  `operate_time` timestamp(0),

  `expire_time` timestamp(0),

  `range_desc` varchar(500),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_coupon_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_coupon_info');

9.2.12 商品平台属性表ods_sku_attr_value

CREATE TABLE `hudi_ods`.`ods_sku_attr_value`  (

  `id` bigint,

  `attr_id` bigint,

  `value_id` bigint,

  `sku_id` bigint,

  `attr_name` varchar(30),

  `value_name` varchar(30),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_sku_attr_value',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_sku_attr_value');

9.2.13 商品表ods_sku_info

CREATE TABLE `hudi_ods`.`ods_sku_info`  (

  `id` bigint,

  `spu_id` bigint,

  `price` decimal(10, 0),

  `sku_name` varchar(200),

  `sku_desc` varchar(2000),

  `weight` decimal(10, 2),

  `tm_id` bigint,

  `category3_id` bigint,

  `sku_default_img` varchar(300),

  `is_sale` boolean,

  `create_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_sku_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_sku_info');

9.2.14 商品销售属性值表ods_sku_sale_attr_value

CREATE TABLE `hudi_ods`.`ods_sku_sale_attr_value`  (

  `id` bigint,

  `sku_id` bigint,

  `spu_id` int,

  `sale_attr_value_id` bigint,

  `sale_attr_id` bigint,

  `sale_attr_name` varchar(30),

  `sale_attr_value_name` varchar(30),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_sku_sale_attr_value',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_sku_sale_attr_value');

9.2.15 SPU表ods_spu_info

CREATE TABLE `hudi_ods`.`ods_spu_info`  (

  `id` bigint,

  `spu_name` varchar(200),

  `description` varchar(1000),

  `category3_id` bigint,

  `tm_id` bigint,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_spu_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_spu_info');

9.2.16 购物车表ods_cart_info

CREATE TABLE `hudi_ods`.`ods_cart_info`  (

  `id` bigint,

  `user_id` varchar(200),

  `sku_id` bigint,

  `cart_price` decimal(10, 2),

  `sku_num` int,

  `img_url` varchar(200),

  `sku_name` varchar(200),

  `is_checked` int,

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `is_ordered` bigint,

  `order_time` timestamp(0),

  `source_type` varchar(20),

  `source_id` bigint,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_cart_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_cart_info');

9.2.17 评论表ods_comment_info

CREATE TABLE `hudi_ods`.`ods_comment_info`  (

  `id` bigint,

  `user_id` bigint,

  `nick_name` varchar(20),

  `head_img` varchar(200),

  `sku_id` bigint,

  `spu_id` bigint,

  `order_id` bigint,

  `appraise` varchar(10),

  `comment_txt` varchar(2000),

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_comment_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_comment_info');

9.2.18 优惠券领用表ods_coupon_use

CREATE TABLE `hudi_ods`.`ods_coupon_use`  (

  `id` bigint,

  `coupon_id` bigint,

  `user_id` bigint,

  `order_id` bigint,

  `coupon_status` varchar(10),

  `create_time` timestamp(0),

  `get_time` timestamp(0),

  `using_time` timestamp(0),

  `used_time` timestamp(0),

  `expire_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_coupon_use',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_coupon_use');

9.2.19 收藏表ods_favor_info

CREATE TABLE `hudi_ods`.`ods_order_detail`  (

  `id` bigint,

  `order_id` bigint,

  `sku_id` bigint,

  `sku_name` varchar(200),

  `img_url` varchar(200),

  `order_price` decimal(10, 2),

  `sku_num` bigint,

  `create_time` timestamp(0),

  `source_type` varchar(20),

  `source_id` bigint,

  `split_total_amount` decimal(16, 2),

  `split_activity_amount` decimal(16, 2),

  `split_coupon_amount` decimal(16, 2),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_detail');

9.2.20 订单明细表ods_order_detail

CREATE TABLE `hudi_ods`.`ods_order_detail`  (

  `id` bigint,

  `order_id` bigint,

  `sku_id` bigint,

  `sku_name` varchar(200),

  `img_url` varchar(200),

  `order_price` decimal(10, 2),

  `sku_num` bigint,

  `create_time` timestamp(0),

  `source_type` varchar(20),

  `source_id` bigint,

  `split_total_amount` decimal(16, 2),

  `split_activity_amount` decimal(16, 2),

  `split_coupon_amount` decimal(16, 2),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_detail');

9.2.21 订单明细活动关联表ods_order_detail_activity

CREATE TABLE `hudi_ods`.`ods_order_detail_activity`  (

  `id` bigint,

  `order_id` bigint,

  `order_detail_id` bigint,

  `activity_id` bigint,

  `activity_rule_id` bigint,

  `sku_id` bigint,

  `create_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail_activity',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_detail_activity');

9.2.22 订单明细优惠券关联表ods_order_detail_coupon

CREATE TABLE `hudi_ods`.`ods_order_detail_coupon`  (

  `id` bigint,

  `order_id` bigint,

  `order_detail_id` bigint,

  `coupon_id` bigint,

  `coupon_use_id` bigint,

  `sku_id` bigint,

  `create_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail_coupon',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_detail_coupon');

9.2.23 订单表ods_order_info

CREATE TABLE `hudi_ods`.`ods_order_info`  (

  `id` bigint,

  `consignee` varchar(100),

  `consignee_tel` varchar(20),

  `total_amount` decimal(10, 2),

  `order_status` varchar(20),

  `user_id` bigint,

  `payment_way` varchar(20),

  `delivery_address` varchar(1000),

  `order_comment` varchar(200),

  `out_trade_no` varchar(50),

  `trade_body` varchar(200),

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `expire_time` timestamp(0),

  `process_status` varchar(20),

  `tracking_no` varchar(100),

  `parent_order_id` bigint,

  `img_url` varchar(200),

  `province_id` int,

  `activity_reduce_amount` decimal(16, 2),

  `coupon_reduce_amount` decimal(16, 2),

  `original_total_amount` decimal(16, 2),

  `feight_fee` decimal(16, 2),

  `feight_fee_reduce` decimal(16, 2),

  `refundable_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_info');

9.2.24 退单表ods_order_refund_info

CREATE TABLE `hudi_ods`.`ods_order_refund_info`  (

  `id` bigint,

  `user_id` bigint,

  `order_id` bigint,

  `sku_id` bigint,

  `refund_type` varchar(20),

  `refund_num` bigint,

  `refund_amount` decimal(16, 2),

  `refund_reason_type` varchar(200),

  `refund_reason_txt` varchar(20),

  `refund_status` varchar(10),

  `create_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_refund_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_refund_info');

9.2.25 订单状态流水表ods_order_status_log

CREATE TABLE `hudi_ods`.`ods_order_status_log`  (

  `id` bigint,

  `order_id` bigint,

  `order_status` varchar(11),

  `operate_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_status_log',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_order_status_log');

9.2.26 支付表ods_payment_info

CREATE TABLE `hudi_ods`.`ods_payment_info`  (

  `id` int,

  `out_trade_no` varchar(50),

  `order_id` bigint,

  `user_id` bigint,

  `payment_type` varchar(20),

  `trade_no` varchar(50),

  `total_amount` decimal(10, 2),

  `subject` varchar(200),

  `payment_status` varchar(20),

  `create_time` timestamp(0),

  `callback_time` timestamp(0),

  `callback_content` string,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_payment_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_payment_info');

9.2.27 退款表ods_refund_payment

CREATE TABLE `hudi_ods`.`ods_refund_payment`  (

  `id` int,

  `out_trade_no` varchar(50),

  `order_id` bigint,

  `sku_id` bigint,

  `payment_type` varchar(20),

  `trade_no` varchar(50),

  `total_amount` decimal(10, 2),

  `subject` varchar(200),

  `refund_status` varchar(30),

  `create_time` timestamp(0),

  `callback_time` timestamp(0),

  `callback_content` string,

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_refund_payment',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_refund_payment');

9.2.28 用户表ods_user_info

CREATE TABLE `hudi_ods`.`ods_user_info`  (

  `id` bigint,

  `login_name` varchar(200),

  `nick_name` varchar(200),

  `passwd` varchar(200),

  `name` varchar(200),

  `phone_num` varchar(200),

  `email` varchar(200),

  `head_img` varchar(200),

  `user_level` varchar(200),

  `birthday` date,

  `gender` varchar(1),

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `status` varchar(200),

  `ts` timestamp(3),

  `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_user_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ods',

    'hive_sync.table'='ods_user_info');

9.2.29 数据装载

(1)我们将Flink CDC拿到的数据,分别对应hudi_ods库中的每张表进行插入操作。因为对每张表设置了hive_sync,所以每张表在hive的hive_ods数据库中同样有一份数据映射。

(2)在Flink SQL中执行插入语句,每个语句都会启动一个Flink SQL程序,数据将会实时地进入ODS层中。

-- ods_activity_info 插入数据

INSERT INTO hudi_ods.ods_activity_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.activity_info_cdc;

-- select * from hudi_ods.ods_activity_info;

-- ods_activity_rule 插入数据

INSERT INTO hudi_ods.ods_activity_rule

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.activity_rule_cdc;

-- select * from hudi_ods.ods_activity_rule;

-- ods_base_category1 插入数据

INSERT INTO hudi_ods.ods_base_category1

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_category1_cdc;

-- select * from hudi_ods.ods_base_category1;

-- ods_base_category2 插入数据

INSERT INTO hudi_ods.ods_base_category2

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_category2_cdc;

-- select * from hudi_ods.ods_base_category2;

-- ods_base_category3 插入数据

INSERT INTO hudi_ods.ods_base_category3

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_category3_cdc;

-- select * from hudi_ods.ods_base_category3;

-- ods_base_dic 插入数据

INSERT INTO hudi_ods.ods_base_dic

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_dic_cdc;

-- select * from hudi_ods.ods_base_dic;

-- ods_base_province 插入数据

INSERT INTO hudi_ods.ods_base_province

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_province_cdc;

-- select * from hudi_ods.ods_base_province;

-- ods_base_region 插入数据

INSERT INTO hudi_ods.ods_base_region

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_region_cdc;

-- select * from hudi_ods.ods_base_region;

-- ods_base_trademark 插入数据

INSERT INTO hudi_ods.ods_base_trademark

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.base_trademark_cdc;

-- select * from hudi_ods.ods_base_trademark;

-- ods_cart_info 插入数据

INSERT INTO hudi_ods.ods_cart_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.cart_info_cdc;

-- select * from hudi_ods.ods_cart_info;

-- ods_comment_info 插入数据

INSERT INTO hudi_ods.ods_comment_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.comment_info_cdc;

-- select * from hudi_ods.ods_comment_info;

-- ods_coupon_info 插入数据

INSERT INTO hudi_ods.ods_coupon_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.coupon_info_cdc;

-- select * from hudi_ods.ods_coupon_info;

-- ods_coupon_use 插入数据

INSERT INTO hudi_ods.ods_coupon_use

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.coupon_use_cdc;

-- select * from hudi_ods.ods_coupon_use;

-- ods_favor_info 插入数据

INSERT INTO hudi_ods.ods_favor_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.favor_info_cdc;

-- select * from hudi_ods.ods_favor_info;

-- ods_order_detail 插入数据

INSERT INTO hudi_ods.ods_order_detail

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.order_detail_cdc;

-- select * from hudi_ods.ods_order_detail;

-- ods_order_detail_activity 插入数据

INSERT INTO hudi_ods.ods_order_detail_activity

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.order_detail_activity_cdc;

-- select * from hudi_ods.ods_order_detail_activity;

-- ods_order_detail_coupon 插入数据

INSERT INTO hudi_ods.ods_order_detail_coupon

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.order_detail_coupon_cdc;

-- select * from hudi_ods.ods_order_detail_coupon;

-- ods_order_info 插入数据

INSERT INTO hudi_ods.ods_order_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.order_info_cdc;

-- select * from hudi_ods.ods_order_info;

-- ods_order_refund_info 插入数据

INSERT INTO hudi_ods.ods_order_refund_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.order_refund_info_cdc;

-- select * from hudi_ods.ods_order_refund_info;

-- ods_order_status_log 插入数据

INSERT INTO hudi_ods.ods_order_status_log

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.order_status_log_cdc;

-- select * from hudi_ods.ods_order_status_log;

-- ods_payment_info 插入数据

INSERT INTO hudi_ods.ods_payment_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.payment_info_cdc;

-- select * from hudi_ods.ods_payment_info;

-- ods_refund_payment 插入数据

INSERT INTO hudi_ods.ods_refund_payment

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.refund_payment_cdc;

-- select * from hudi_ods.ods_refund_payment;

-- ods_sku_attr_value 插入数据

INSERT INTO hudi_ods.ods_sku_attr_value

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.sku_attr_value_cdc;

-- select * from hudi_ods.ods_sku_attr_value;

-- ods_sku_info 插入数据

INSERT INTO hudi_ods.ods_sku_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.sku_attr_value_cdc;

-- select * from hudi_ods.ods_sku_info;

-- ods_sku_sale_attr_value 插入数据

INSERT INTO hudi_ods.ods_sku_sale_attr_value

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.sku_sale_attr_value_cdc;

-- select * from hudi_ods.ods_sku_sale_attr_value;

-- ods_spu_info 插入数据

INSERT INTO hudi_ods.ods_spu_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.spu_info_cdc;

-- select * from hudi_ods.ods_spu_info;

-- ods_user_info 插入数据

INSERT INTO hudi_ods.ods_user_info

select *

       ,LOCALTIMESTAMP as ts

       ,cast(CURRENT_DATE as string) as dt

from flink_cdc.user_info_cdc;

-- select * from hudi_ods.ods_user_info;

c

第10章 湖仓一体之DIM层

DIM层设计要点:

1)DIM层的设计依据是维度建模理论,该层存储维度模型的维度表。

2)日期维度采用文件导入

10.1 商品维度表dim_sku

10.1.1建表语句

CREATE TABLE hudi_dim.dim_sku

(

    `id`                   BIGINT,

    `price`                DECIMAL(10, 0),

    `sku_name`             VARCHAR(200),

    `sku_desc`             VARCHAR(2000),

    `weight`               DECIMAL(10,2),

    `is_sale`              BOOLEAN,

    `spu_id`               BIGINT,

    `spu_name`             VARCHAR(200),

    `category3_id`         BIGINT,

    `category3_name`       VARCHAR(200),

    `category2_id`         BIGINT,

    `category2_name`       VARCHAR(200),

    `category1_id`         BIGINT,

    `category1_name`       VARCHAR(10),

    `tm_id`                BIGINT,

    `tm_name`              VARCHAR(100),

    `sku_attr_values`      MULTISET<string>,

    `sku_sale_attr_values` MULTISET<string>,

    `create_time`          TIMESTAMP(0),

    `ts` timestamp(3),

    `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_sku',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dim',

    'hive_sync.table'='dim_sku');

10.1.2数据装载

insert into hudi_dim.dim_sku

select

    sku.id,

    sku.price,

    sku.sku_name,

    sku.sku_desc,

    sku.weight,

    sku.is_sale,

    sku.spu_id,

    spu.spu_name,

    sku.category3_id,

    c3.name,

    c3.category2_id,

    c2.name,

    c2.category1_id,

    c1.name,

    sku.tm_id,

    tm.tm_name,

    attr.attrs,

    sale_attr.sale_attrs,

    sku.create_time,

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as string) as dt

from hudi_ods.ods_sku_info sku

left join hudi_ods.ods_spu_info spu on sku.spu_id=spu.id

left join hudi_ods.ods_base_category3 c3 on sku.category3_id=c3.id

left join hudi_ods.ods_base_category2 c2 on c3.category2_id=c2.id

left join hudi_ods.ods_base_category1 c1 on c2.category1_id=c1.id

left join hudi_ods.ods_base_trademark tm on sku.tm_id=tm.id

left join

(

    select

        sku_id,

        collect(concat_ws('|',cast(attr_id as string),cast(value_id as string),attr_name,value_name)) attrs

    from hudi_ods.ods_sku_attr_value

    group by sku_id

) attr on sku.id=attr.sku_id

left join

(

    select

        sku_id,

        collect(concat_ws('|',cast(sale_attr_id as string),cast(sale_attr_value_id as string),sale_attr_name,sale_attr_value_name)) sale_attrs

    from hudi_ods.ods_sku_sale_attr_value

    group by sku_id

) sale_attr on sku.id=sale_attr.sku_id;

10.2 优惠券维度表dim_coupon

10.2.1建表语句

CREATE TABLE hudi_dim.dim_coupon

(

    `id`               BIGINT,

    `coupon_name`      VARCHAR(100),

    `coupon_type_code` VARCHAR(10),

    `coupon_type_name` VARCHAR(100),

    `condition_amount` DECIMAL(10, 2),

    `condition_num`    BIGINT,

    `activity_id`      BIGINT,

    `benefit_amount`   DECIMAL(16, 2),

    `benefit_discount` DECIMAL(10, 2),

    `benefit_rule`     STRING,

    `create_time`      TIMESTAMP(0),

    `range_type_code`  VARCHAR(10),

    `range_type_name`  VARCHAR(100),

    `limit_num`        INT,

    `taken_count`      INT,

    `start_time`       TIMESTAMP(0),

    `end_time`         TIMESTAMP(0),

    `operate_time`     TIMESTAMP(0),

    `expire_time`      TIMESTAMP(0),

    `ts` timestamp(3),

    `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_coupon',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dim',

    'hive_sync.table'='dim_coupon');

10.2.2数据装载

insert into hudi_dim.dim_coupon

select

    id,

    coupon_name,

    coupon_type,

    coupon_dic.dic_name,

    condition_amount,

    condition_num,

    activity_id,

    benefit_amount,

    benefit_discount,

    case coupon_type

        when '3201' then concat('满',condition_amount,'元减',benefit_amount,'元')

        when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')

        when '3203' then concat('减',benefit_amount,'元')

    end benefit_rule,

    create_time,

    range_type,

    range_dic.dic_name,

    limit_num,

    taken_count,

    start_time,

    end_time,

    operate_time,

    expire_time,

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as string) as dt

from

(

    select

        id,

        coupon_name,

        coupon_type,

        condition_amount,

        condition_num,

        activity_id,

        benefit_amount,

        benefit_discount,

        create_time,

        range_type,

        limit_num,

        taken_count,

        start_time,

        end_time,

        operate_time,

        expire_time

    from hudi_ods.ods_coupon_info

)ci

left join

(

    select

        dic_code,

        dic_name

    from hudi_ods.ods_base_dic

    where parent_code='32'

)coupon_dic

on ci.coupon_type=coupon_dic.dic_code

left join

(

    select

        dic_code,

        dic_name

    from hudi_ods.ods_base_dic

    where parent_code='33'

)range_dic

on ci.range_type=range_dic.dic_code;

10.3 活动维度表dim_activity

10.3.1建表语句

CREATE TABLE hudi_dim.dim_activity

(

    `activity_rule_id`   int,

    `activity_id`        bigint,

    `activity_name`      varchar(200),

    `activity_type_code` varchar(20),

    `activity_type_name` varchar(100),

    `activity_desc`      varchar(2000),

    `start_time`         timestamp(0),

    `end_time`           timestamp(0),

    `create_time`        timestamp(0),

    `condition_amount`   DECIMAL(16, 2),

    `condition_num`      BIGINT,

    `benefit_amount`     DECIMAL(16, 2),

    `benefit_discount`   DECIMAL(10, 2),

    `benefit_rule`       STRING,

    `benefit_level`      bigint,

    `ts` timestamp(3),

    `dt` string             

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_activity',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'activity_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dim',

    'hive_sync.table'='dim_activity');

10.3.2数据装载

insert into hudi_dim.dim_activity

select

    rule.id,

    info.id,

    activity_name,

    rule.activity_type,

    dic.dic_name,

    activity_desc,

    start_time,

    end_time,

    create_time,

    condition_amount,

    condition_num,

    benefit_amount,

    benefit_discount,

    case rule.activity_type

        when '3101' then concat('满',cast(condition_amount as string),'元减',cast(benefit_amount as string),'元')

        when '3102' then concat('满',cast(condition_amount as string),'件打',cast(10*(1-benefit_discount) as string),'折')

        when '3103' then concat('打',cast(10*(1-benefit_discount) as string),'折')

    end benefit_rule,

    benefit_level,

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as string) as dt

from hudi_ods.ods_activity_rule rule

left join hudi_ods.ods_activity_info info

on rule.activity_id=info.id

left join

(

    select

        dic_code,

        dic_name

    from hudi_ods.ods_base_dic

    where parent_code='31'

)dic

on rule.activity_type=dic.dic_code;

10.4 地区维度表dim_province

10.4.1建表语句

CREATE TABLE hudi_dim.dim_province

(

    `id`            bigint,

    `province_name` varchar(20),

    `area_code`     varchar(20),

    `iso_code`      varchar(20),

    `iso_3166_2`    varchar(20),

    `region_id`     varchar(20),

    `region_name`   varchar(20),

    `ts` timestamp(3),

    `dt` string

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_province',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dim',

    'hive_sync.table'='dim_province');

10.4.2数据装载

insert into hudi_dim.dim_province

select

    province.id,

    province.name,

    province.area_code,

    province.iso_code,

    province.iso_3166_2,

    region_id,

    region_name,

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as string) as dt

from hudi_ods.ods_base_province province

left join hudi_ods.ods_base_region region

on province.region_id=region.id;

10.5 日期维度表dim_date

10.5.1建表语句

CREATE TABLE hudi_dim.dim_date

(

    `date_id`    STRING,

    `week_id`    STRING,

    `week_day`   STRING,

    `day`        STRING,

    `month`      STRING,

    `quarter`    STRING,

    `year`       STRING,

    `is_workday` STRING,

    `holiday_id` STRING,

    `ts`         TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_date',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'date_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '1',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '1',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dim',

    'hive_sync.table'='dim_date');

10.5.2数据装载

通常情况下,时间维度表的数据并不是来自于业务系统,而是手动写入,并且由于时间维度表数据的可预见性,无须每日导入,一般可一次性导入一年的数据。

(1)将HDFS的文件映射为一张表

CREATE TABLE myflink.date_file_source (

    `date_id`    STRING,

    `week_id`    STRING,

    `week_day`   STRING,

    `day`        STRING,

    `month`      STRING,

    `quarter`    STRING,

    `year`       STRING,

    `is_workday` STRING,

    `holiday_id` STRING

) WITH (

  'connector' = 'filesystem',           

  'path' = 'hdfs://hadoop1:8020/tmp_data/date_info.csv',

  'format' = 'csv'

);

(2)将数据文件上传到HDFS上临时表路径/tmp_data/date_info.csv

(3)执行以下语句将其导入时间维度表

insert into hudi_dim.dim_date

select

    *,

    LOCALTIMESTAMP as ts

from myflink.date_file_source;

10.6 用户维度表dim_user_info

10.6.1建表语句

CREATE TABLE hudi_dim.dim_user_info

(

  `id` bigint,

  `login_name` varchar(200),

  `nick_name` varchar(200),

  `name` varchar(200),

  `phone_num` varchar(200),

  `email` varchar(200),

  `user_level` varchar(200),

  `birthday` date,

  `gender` varchar(1),

  `create_time` timestamp(0),

  `operate_time` timestamp(0),

  `start_time` timestamp(0),

  `ts` timestamp(3),

  `dt` string             

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_user_info',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dim',

    'hive_sync.table'='dim_user_info');

10.6.2数据装载

insert into hudi_dim.dim_user_info

select

    id,

    login_name,

    nick_name,

    md5(name) name,

    md5(if(regexp(phone_num,'^(13[0-9]|14[01456879]|15[0-35-9]|16[2567]|17[0-8]|18[0-9]|19[0-35-9])\\\d{8}$'),phone_num,CAST(NULL AS STRING))) phone_num,

    md5(if(regexp(email,'^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\\\.[a-zA-Z0-9_-]+)+$'),email,CAST(NULL AS STRING))) email,

    user_level,

    birthday,

    gender,

    create_time,

    operate_time,

    nvl(operate_time,create_time) as start_time,

    LOCALTIMESTAMP as ts,

    date_format(create_time,'yyyy-MM-dd') as dt

from hudi_ods.ods_user_info;

第11章 湖仓一体之DWD

DWD层设计要点:

1)DWD层的设计依据是维度建模理论,该层存储维度模型的事实表。

2)DWD层的数据存储格式为orc列式存储+snappy压缩。

3)DWD层表名的命名规范为dwd_数据域_表名_单分区增量全量标识(inc/full)

11.1 交易域加购事务事实表dwd_trade_cart_add

11.1.1建表语句

CREATE TABLE hudi_dwd.dwd_trade_cart_add

(

    `id`               BIGINT,

    `user_id`          VARCHAR(200),

    `sku_id`           BIGINT,

    `date_id`          STRING,

    `create_time`      TIMESTAMP(0),

    `source_id`        BIGINT,

    `source_type_code` VARCHAR(20),

    `source_type_name` varchar(100),

    `sku_num`          INT,

    `ts`               TIMESTAMP(3),

    `dt`               STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_cart_add',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_trade_cart_add'

);

11.1.2数据装载

insert into hudi_dwd.dwd_trade_cart_add

select

    cart.id,

    cart.user_id,

    cart.sku_id,

    cart.dt date_id,

    cart.create_time,

    cart.source_id,

    cart.source_type,

    dic.source_type_name,

    cart.sku_num,

    cart.ts,

    cart.dt

from hudi_ods.ods_cart_info cart

left join

(

    select

        dic_code,

        dic_name source_type_name

    from hudi_ods.ods_base_dic

    where parent_code='24'

)dic

on cart.source_type=dic.dic_code;

11.2 交易域下单事务事实表dwd_trade_order_detail

11.2.1建表语句

CREATE TABLE hudi_dwd.dwd_trade_order_detail

(

    `id`                    BIGINT,

    `order_id`              BIGINT,

    `user_id`               BIGINT,

    `sku_id`                BIGINT,

    `province_id`           INT,

    `activity_id`           BIGINT,

    `activity_rule_id`      BIGINT,

    `coupon_id`             BIGINT,

    `date_id`               STRING,

    `create_time`           TIMESTAMP(0),

    `source_id`             BIGINT,

    `source_type_code`      VARCHAR(20),

    `source_type_name`      VARCHAR(100),

    `sku_num`               BIGINT,

    `split_original_amount` DECIMAL(16, 2),

    `split_activity_amount` DECIMAL(16, 2),

    `split_coupon_amount`   DECIMAL(16, 2),

    `split_total_amount`    DECIMAL(16, 2),

    `ts`                    TIMESTAMP(3),

    `dt`                    STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_order_detail',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_trade_order_detail'

);

11.2.2数据装载

insert into hudi_dwd.dwd_trade_order_detail

select

    od.id,

    od.order_id,

    oi.user_id,

    od.sku_id,

    oi.province_id,

    act.activity_id,

    act.activity_rule_id,

    cou.coupon_id,

    od.dt date_id,

    od.create_time,

    od.source_id,

    od.source_type,

    dic.dic_name,

    od.sku_num,

    od.sku_num * od.order_price split_original_amount,

    nvl(od.split_activity_amount,0.0) split_activity_amount,

    nvl(od.split_coupon_amount,0.0) split_coupon_amount,

    od.split_total_amount,

    od.ts,

    od.dt

from hudi_ods.ods_order_detail od

left join

(

    select

        id,

        user_id,

        province_id

    from hudi_ods.ods_order_info

) oi

on od.order_id = oi.id

left join

(

    select

        order_detail_id,

        activity_id,

        activity_rule_id

    from hudi_ods.ods_order_detail_activity

) act

on od.id = act.order_detail_id

left join

(

    select

        order_detail_id,

        coupon_id

    from hudi_ods.ods_order_detail_coupon

) cou

on od.id = cou.order_detail_id

left join

(

    select

        dic_code,

        dic_name

    from hudi_ods.ods_base_dic

    where parent_code='24'

)dic

on od.source_type=dic.dic_code;

11.3 交易域支付成功事务事实表dwd_trade_pay_detail_suc

11.3.1建表语句

CREATE TABLE hudi_dwd.dwd_trade_pay_detail_suc

(

    `id`                    BIGINT,

    `order_id`              BIGINT,

    `user_id`               BIGINT,

    `sku_id`                BIGINT,

    `province_id`           INT,

    `activity_id`           BIGINT,

    `activity_rule_id`      BIGINT,

    `coupon_id`             BIGINT,

    `payment_type_code`     VARCHAR(20),

    `payment_type_name`     VARCHAR(100),

    `date_id`               STRING,

    `callback_time`         TIMESTAMP(0),

    `source_id`             BIGINT,

    `source_type_code`      VARCHAR(20),

    `source_type_name`      VARCHAR(100),

    `sku_num`               BIGINT,

    `split_original_amount` DECIMAL(16, 2),

    `split_activity_amount` DECIMAL(16, 2),

    `split_coupon_amount`   DECIMAL(16, 2),

    `split_payment_amount`  DECIMAL(16, 2),

    `ts`                    TIMESTAMP(3),

    `dt`                    STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_pay_detail_suc',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_trade_pay_detail_suc'

);

11.3.2数据装载

insert into hudi_dwd.dwd_trade_pay_detail_suc

select

    od.id,

    od.order_id,

    pi.user_id,

    od.sku_id,

    oi.province_id,

    act.activity_id,

    act.activity_rule_id,

    cou.coupon_id,

    pi.payment_type,

    pay_dic.dic_name,

    date_format(pi.callback_time,'yyyy-MM-dd') date_id,

    pi.callback_time,

    od.source_id,

    od.source_type,

    src_dic.dic_name,

    od.sku_num,

    od.sku_num * od.order_price split_original_amount,

    nvl(od.split_activity_amount,0.0) split_activity_amount,

    nvl(od.split_coupon_amount,0.0) split_coupon_amount,

    od.split_total_amount,

    od.ts,

    od.dt

from hudi_ods.ods_order_detail od

join

(

    select

        user_id,

        order_id,

        payment_type,

        callback_time

    from hudi_ods.ods_payment_info

    where payment_status='1602'

) pi

on od.order_id=pi.order_id

left join

(

    select

        id,

        province_id

    from hudi_ods.ods_order_info

) oi

on od.order_id = oi.id

left join

(

    select

        order_detail_id,

        activity_id,

        activity_rule_id

    from hudi_ods.ods_order_detail_activity

) act

on od.id = act.order_detail_id

left join

(

    select

        order_detail_id,

        coupon_id

    from hudi_ods.ods_order_detail_coupon

) cou

on od.id = cou.order_detail_id

left join

(

    select

        dic_code,

        dic_name

    from hudi_ods.ods_base_dic

    where parent_code='11'

) pay_dic

on pi.payment_type=pay_dic.dic_code

left join

(

    select

        dic_code,

        dic_name

    from hudi_ods.ods_base_dic

    where parent_code='24'

)src_dic

on od.source_type=src_dic.dic_code;

11.4 交易域交易流程累积快照事实表dwd_trade_trade_flow_acc

11.4.1建表语句

CREATE TABLE hudi_dwd.dwd_trade_trade_flow_acc

(

    `order_id`              BIGINT,

    `user_id`               BIGINT,

    `province_id`           INT,

    `order_date_id`         STRING,

    `order_time`            TIMESTAMP(0),

    `payment_date_id`       STRING,

    `payment_time`          TIMESTAMP(0),

    `finish_date_id`        STRING,

    `finish_time`           TIMESTAMP(0),

    `order_original_amount` DECIMAL(16, 2),

    `order_activity_amount` DECIMAL(16, 2),

    `order_coupon_amount`   DECIMAL(16, 2),

    `order_total_amount`    DECIMAL(10, 2),

    `payment_amount`        DECIMAL(10, 2),

    `ts`                    TIMESTAMP(3),

    `dt`                    STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_trade_flow_acc',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'order_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_trade_trade_flow_acc'

);

11.4.2数据装载

insert into hudi_dwd.dwd_trade_trade_flow_acc

select

    oi.id,

    oi.user_id,

    oi.province_id,

    date_format(oi.create_time,'yyyy-MM-dd'),

    oi.create_time,

    date_format(pi.callback_time,'yyyy-MM-dd'),

    pi.callback_time,

    date_format(log.operate_time,'yyyy-MM-dd'),

    log.operate_time,

    oi.original_total_amount,

    oi.activity_reduce_amount,

    oi.coupon_reduce_amount,

    oi.total_amount,

    nvl(pi.payment_amount,0.0),

    oi.ts,

    oi.dt

from hudi_ods.ods_order_info oi

left join

(

    select

        order_id,

        callback_time,

        total_amount payment_amount

    from hudi_ods.ods_payment_info

    where payment_status='1602'

)pi

on oi.id=pi.order_id

left join

(

    select

        order_id,

        operate_time

    from hudi_ods.ods_order_status_log

    where order_status='1004'

)log

on oi.id=log.order_id;

11.5 工具域优惠券使用(支付)事务事实表dwd_tool_coupon_used

11.5.1建表语句

CREATE TABLE hudi_dwd.dwd_tool_coupon_used

(

    `id`           BIGINT,

    `coupon_id`    BIGINT,

    `user_id`      BIGINT,

    `order_id`     BIGINT,

    `date_id`      STRING,

    `payment_time` TIMESTAMP(0),

    `ts`           TIMESTAMP(3),

    `dt`           STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_tool_coupon_used',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_tool_coupon_used'

);

11.5.2数据装载

insert into hudi_dwd.dwd_tool_coupon_used

select

    id,

    coupon_id,

    user_id,

    order_id,

    date_format(used_time,'yyyy-MM-dd') date_id,

    used_time,

    ts,

    dt

from hudi_ods.ods_coupon_use

where used_time is not null;

11.6 互动域收藏商品事务事实表dwd_interaction_favor_add

11.6.1建表语句

CREATE TABLE hudi_dwd.dwd_interaction_favor_add

(

    `id`          BIGINT,

    `user_id`     BIGINT,

    `sku_id`      BIGINT,

    `date_id`     STRING,

    `create_time` TIMESTAMP(0),

    `ts`          TIMESTAMP(3),

    `dt`          STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_interaction_favor_add',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_interaction_favor_add'

);

11.6.2数据装载

insert into hudi_dwd.dwd_interaction_favor_add

select

    id,

    user_id,

    sku_id,

    date_format(create_time,'yyyy-MM-dd') date_id,

    create_time,

    ts,

    dt

from hudi_ods.ods_favor_info;

11.7 流量域页面浏览事务事实表dwd_traffic_page_view

11.7.1建表语句

CREATE TABLE hudi_dwd.dwd_traffic_page_view

(

    `uuid`           STRING,

    `province_id`    BIGINT,

    `brand`          STRING,

    `channel`        STRING,

    `is_new`         STRING,

    `model`          STRING,

    `mid_id`         STRING,

    `operate_system` STRING,

    `user_id`        STRING,

    `version_code`   STRING,

    `page_item`      STRING,

    `page_item_type` STRING,

    `last_page_id`   STRING,

    `page_id`        STRING,

    `source_type`    STRING,

    `date_id`        STRING,

    `view_time`      STRING,

    `session_id`     STRING,

    `during_time`    STRING,

    `ts`             BIGINT,

    `dt`             STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_traffic_page_view',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'uuid',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_traffic_page_view'

);

11.7.2数据装载

insert into hudi_dwd.dwd_traffic_page_view

select

    uuid,

    province_id,

    brand,

    channel,

    is_new,

    model,

    mid_id,

    operate_system,

    user_id,

    version_code,

    page_item,

    page_item_type,

    last_page_id,

    page_id,

    source_type,

    date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,

    date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,

    session_id,

    during_time,

    ts,

    dt

from

(

    select

        uuid,

        common.ar area_code,

        common.ba brand,

        common.ch channel,

        common.is_new is_new,

        common.md model,

        common.mid mid_id,

        common.os operate_system,

        common.uid user_id,

        common.vc version_code,

        page.during_time,

        page.item page_item,

        page.item_type page_item_type,

        page.last_page_id,

        page.page_id,

        page.source_type,

        ts,

        concat(common.mid,'-',last_value(if(page.last_page_id is null,ts,cast(null as string))) over (partition by common.mid order by t)) session_id,

        dt

    from hudi_ods.ods_log

    where page is not null

)log

left join

(

    select

        id province_id,

        area_code

    from hudi_ods.ods_base_province

)bp

on log.area_code=bp.area_code;

11.8 用户域用户注册事务事实表dwd_user_register

11.8.1建表语句

CREATE TABLE hudi_dwd.dwd_user_register

(

    `user_id`        BIGINT,

    `date_id`        STRING,

    `create_time`    TIMESTAMP(0),

    `channel`        STRING,

    `province_id`    BIGINT,

    `version_code`   STRING,

    `mid_id`         STRING,

    `brand`          STRING,

    `model`          STRING,

    `operate_system` STRING,

    `ts`             BIGINT,

    `dt`             STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_user_register',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_user_register'

);

11.8.2数据装载

insert into hudi_dwd.dwd_user_register

select

    ui.user_id,

    date_format(ui.create_time,'yyyy-MM-dd') date_id,

    ui.create_time,

    log.channel,

    bp.province_id,

    log.version_code,

    log.mid_id,

    log.brand,

    log.model,

    log.operate_system,

    ui.ts,

    date_format(ui.create_time,'yyyy-MM-dd') dt

from

(

    select

        id user_id,

        create_time

    from hudi_ods.ods_user_info

)ui

left join

(

    select

        common.ar area_code,

        common.ba brand,

        common.ch channel,

        common.md model,

        common.mid mid_id,

        common.os operate_system,

        common.uid user_id,

        common.vc version_code,

        ts

    from hudi_ods.ods_log

    where page.page_id='register'

    and common.uid is not null

)log

on cast(ui.user_id as string)=log.user_id

left join

(

    select

        id province_id,

        area_code

    from hudi_ods.ods_base_province

)bp

on log.area_code=bp.area_code;

11.9 用户域用户登录事务事实表dwd_user_login

11.9.1建表语句

CREATE TABLE hudi_dwd.dwd_user_login

(

    `user_id`        STRING,

    `date_id`        STRING,

    `login_time`     STRING,

    `channel`        STRING,

    `province_id`    BIGINT,

    `version_code`   STRING,

    `mid_id`         STRING,

    `brand`          STRING,

    `model`          STRING,

    `operate_system` STRING,

    `ts`             TIMESTAMP(3),

    `dt`             STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_user_login',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dwd',

    'hive_sync.table'='dwd_user_login'

);

11.9.2数据装载

insert into hudi_dwd.dwd_user_login

select

    user_id,

    date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,

    date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,

    channel,

    province_id,

    version_code,

    mid_id,

    brand,

    model,

    operate_system,

    ts,

    dt

from

(

    select

        user_id,

        channel,

        area_code,

        version_code,

        mid_id,

        brand,

        model,

        operate_system,

        ts,

        dt

    from

    (

        select

            user_id,

            channel,

            area_code,

            version_code,

            mid_id,

            brand,

            model,

            operate_system,

            ts,

            dt,

            row_number() over (partition by session_id order by ts) rn

        from

        (

            select

                common.uid user_id,

                common.ch channel,

                common.ar area_code,

                common.vc version_code,

                common.mid mid_id,

                common.ba brand,

                common.md model,

                common.os operate_system,

                ts,

                concat(common.mid,'-',last_value(if(page.last_page_id is null,ts,cast(null as string))) over (partition by common.mid order by t)) session_id,

                dt

            from hudi_ods.ods_log

            where page is not null

        )t1

        where user_id is not null

    )t2

    where rn=1

)t3

left join

(

    select

        id province_id,

        area_code

    from hudi_ods.ods_base_province

)bp

on t3.area_code=bp.area_code;

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值