第9章 湖仓一体之ODS层
ODS层的设计要点如下:
1)ODS层的表结构设计依托于从业务系统同步过来的数据结构。
2)先使用Flink SQL建表,然后将数据源(也就是8.4中的数据)进行insert操作,并补齐缺失的字段。
9.1日志表建表及数据装载
1)建表数据
CREATE TABLE `hudi_ods`.`ods_log` (
`uuid` STRING,
`common` ROW<ar STRING,ba STRING,ch STRING,is_new STRING,md STRING,mid STRING,os STRING,uid STRING,vc STRING>,
`page` ROW<during_time STRING,item STRING,item_type STRING,last_page_id STRING,page_id STRING,source_type STRING> ,
`actions` ARRAY<ROW<action_id STRING,item STRING,item_type STRING,ts BIGINT>>,
`displays` ARRAY<ROW<display_type STRING,item STRING,item_type STRING,`order` STRING,pos_id STRING>>,
`start` ROW<entry STRING,loading_time BIGINT,open_ad_id BIGINT,open_ad_ms BIGINT,open_ad_skip_ms BIGINT>,
`err` ROW<error_code BIGINT,msg STRING>,
`ts` BIGINT,
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_log',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'uuid',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_log'
);
2)数据装载
set table.dynamic-table-options.enabled=true;
set table.exec.sink.not-null-enforcer=drop;
INSERT INTO hudi_ods.ods_log
select uuid() as uuid
,*
,FROM_UNIXTIME(ts/1000,'yyyy-MM-dd') as dt
from `kafka_log`.`kafka_topic_log`
9.2业务表建表及数据装载
9.2.1 活动信息表ods_activity_info
CREATE TABLE `hudi_ods`.`ods_activity_info` (
`id` bigint,
`activity_name` varchar(200),
`activity_type` varchar(10),
`activity_desc` varchar(2000),
`start_time` timestamp(0),
`end_time` timestamp(0),
`create_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_activity_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_activity_info');
9.2.2 活动规则表ods_activity_rule
CREATE TABLE `hudi_ods`.`ods_activity_rule` (
`id` int,
`activity_id` int,
`activity_type` varchar(20),
`condition_amount` decimal(16, 2),
`condition_num` bigint,
`benefit_amount` decimal(16, 2),
`benefit_discount` decimal(10, 2),
`benefit_level` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_activity_rule',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_activity_rule');
9.2.3 一级品类表ods_base_category1
CREATE TABLE `hudi_ods`.`ods_base_category1` (
`id` bigint,
`name` varchar(10),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_category1',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_category1');
9.2.4 二级品类表ods_base_category2
CREATE TABLE `hudi_ods`.`ods_base_category2` (
`id` bigint,
`name` varchar(200),
`category1_id` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_category2',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_category2');
9.2.5 三级品类表ods_base_category3
CREATE TABLE `hudi_ods`.`ods_base_category3` (
`id` bigint,
`name` varchar(200),
`category2_id` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_category3',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_category3');
9.2.6 编码字典表ods_base_dic
CREATE TABLE `hudi_ods`.`ods_base_dic` (
`dic_code` varchar(10),
`dic_name` varchar(100),
`parent_code` varchar(10),
`create_time` timestamp(0),
`operate_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_dic',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'dic_code',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_dic');
9.2.7 省份表ods_base_province
CREATE TABLE `hudi_ods`.`ods_base_province` (
`id` bigint,
`name` varchar(20),
`region_id` varchar(20),
`area_code` varchar(20),
`iso_code` varchar(20),
`iso_3166_2` varchar(20),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_province',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_province');
9.2.8 地区表ods_base_region
CREATE TABLE `hudi_ods`.`ods_base_region` (
`id` varchar(20),
`region_name` varchar(20),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_region',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_region');
9.2.9 品牌表ods_base_trademark
CREATE TABLE `hudi_ods`.`ods_base_trademark` (
`id` bigint,
`tm_name` varchar(100),
`logo_url` varchar(200),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_base_trademark',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_base_trademark');
9.2.10 购物车表ods_cart_info
CREATE TABLE `hudi_ods`.`ods_cart_info` (
`id` bigint,
`user_id` varchar(200),
`sku_id` bigint,
`cart_price` decimal(10, 2),
`sku_num` int,
`img_url` varchar(200),
`sku_name` varchar(200),
`is_checked` int,
`create_time` timestamp(0),
`operate_time` timestamp(0),
`is_ordered` bigint,
`order_time` timestamp(0),
`source_type` varchar(20),
`source_id` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_cart_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_cart_info');
9.2.11 优惠券信息表ods_coupon_info
CREATE TABLE `hudi_ods`.`ods_coupon_info` (
`id` bigint,
`coupon_name` varchar(100),
`coupon_type` varchar(10),
`condition_amount` decimal(10, 2),
`condition_num` bigint,
`activity_id` bigint,
`benefit_amount` decimal(16, 2),
`benefit_discount` decimal(10, 2),
`create_time` timestamp(0),
`range_type` varchar(10),
`limit_num` int,
`taken_count` int,
`start_time` timestamp(0),
`end_time` timestamp(0),
`operate_time` timestamp(0),
`expire_time` timestamp(0),
`range_desc` varchar(500),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_coupon_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_coupon_info');
9.2.12 商品平台属性表ods_sku_attr_value
CREATE TABLE `hudi_ods`.`ods_sku_attr_value` (
`id` bigint,
`attr_id` bigint,
`value_id` bigint,
`sku_id` bigint,
`attr_name` varchar(30),
`value_name` varchar(30),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_sku_attr_value',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_sku_attr_value');
9.2.13 商品表ods_sku_info
CREATE TABLE `hudi_ods`.`ods_sku_info` (
`id` bigint,
`spu_id` bigint,
`price` decimal(10, 0),
`sku_name` varchar(200),
`sku_desc` varchar(2000),
`weight` decimal(10, 2),
`tm_id` bigint,
`category3_id` bigint,
`sku_default_img` varchar(300),
`is_sale` boolean,
`create_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_sku_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_sku_info');
9.2.14 商品销售属性值表ods_sku_sale_attr_value
CREATE TABLE `hudi_ods`.`ods_sku_sale_attr_value` (
`id` bigint,
`sku_id` bigint,
`spu_id` int,
`sale_attr_value_id` bigint,
`sale_attr_id` bigint,
`sale_attr_name` varchar(30),
`sale_attr_value_name` varchar(30),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_sku_sale_attr_value',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_sku_sale_attr_value');
9.2.15 SPU表ods_spu_info
CREATE TABLE `hudi_ods`.`ods_spu_info` (
`id` bigint,
`spu_name` varchar(200),
`description` varchar(1000),
`category3_id` bigint,
`tm_id` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_spu_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_spu_info');
9.2.16 购物车表ods_cart_info
CREATE TABLE `hudi_ods`.`ods_cart_info` (
`id` bigint,
`user_id` varchar(200),
`sku_id` bigint,
`cart_price` decimal(10, 2),
`sku_num` int,
`img_url` varchar(200),
`sku_name` varchar(200),
`is_checked` int,
`create_time` timestamp(0),
`operate_time` timestamp(0),
`is_ordered` bigint,
`order_time` timestamp(0),
`source_type` varchar(20),
`source_id` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_cart_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_cart_info');
9.2.17 评论表ods_comment_info
CREATE TABLE `hudi_ods`.`ods_comment_info` (
`id` bigint,
`user_id` bigint,
`nick_name` varchar(20),
`head_img` varchar(200),
`sku_id` bigint,
`spu_id` bigint,
`order_id` bigint,
`appraise` varchar(10),
`comment_txt` varchar(2000),
`create_time` timestamp(0),
`operate_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_comment_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_comment_info');
9.2.18 优惠券领用表ods_coupon_use
CREATE TABLE `hudi_ods`.`ods_coupon_use` (
`id` bigint,
`coupon_id` bigint,
`user_id` bigint,
`order_id` bigint,
`coupon_status` varchar(10),
`create_time` timestamp(0),
`get_time` timestamp(0),
`using_time` timestamp(0),
`used_time` timestamp(0),
`expire_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_coupon_use',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_coupon_use');
9.2.19 收藏表ods_favor_info
CREATE TABLE `hudi_ods`.`ods_order_detail` (
`id` bigint,
`order_id` bigint,
`sku_id` bigint,
`sku_name` varchar(200),
`img_url` varchar(200),
`order_price` decimal(10, 2),
`sku_num` bigint,
`create_time` timestamp(0),
`source_type` varchar(20),
`source_id` bigint,
`split_total_amount` decimal(16, 2),
`split_activity_amount` decimal(16, 2),
`split_coupon_amount` decimal(16, 2),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_detail');
9.2.20 订单明细表ods_order_detail
CREATE TABLE `hudi_ods`.`ods_order_detail` (
`id` bigint,
`order_id` bigint,
`sku_id` bigint,
`sku_name` varchar(200),
`img_url` varchar(200),
`order_price` decimal(10, 2),
`sku_num` bigint,
`create_time` timestamp(0),
`source_type` varchar(20),
`source_id` bigint,
`split_total_amount` decimal(16, 2),
`split_activity_amount` decimal(16, 2),
`split_coupon_amount` decimal(16, 2),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_detail');
9.2.21 订单明细活动关联表ods_order_detail_activity
CREATE TABLE `hudi_ods`.`ods_order_detail_activity` (
`id` bigint,
`order_id` bigint,
`order_detail_id` bigint,
`activity_id` bigint,
`activity_rule_id` bigint,
`sku_id` bigint,
`create_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail_activity',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_detail_activity');
9.2.22 订单明细优惠券关联表ods_order_detail_coupon
CREATE TABLE `hudi_ods`.`ods_order_detail_coupon` (
`id` bigint,
`order_id` bigint,
`order_detail_id` bigint,
`coupon_id` bigint,
`coupon_use_id` bigint,
`sku_id` bigint,
`create_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_detail_coupon',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_detail_coupon');
9.2.23 订单表ods_order_info
CREATE TABLE `hudi_ods`.`ods_order_info` (
`id` bigint,
`consignee` varchar(100),
`consignee_tel` varchar(20),
`total_amount` decimal(10, 2),
`order_status` varchar(20),
`user_id` bigint,
`payment_way` varchar(20),
`delivery_address` varchar(1000),
`order_comment` varchar(200),
`out_trade_no` varchar(50),
`trade_body` varchar(200),
`create_time` timestamp(0),
`operate_time` timestamp(0),
`expire_time` timestamp(0),
`process_status` varchar(20),
`tracking_no` varchar(100),
`parent_order_id` bigint,
`img_url` varchar(200),
`province_id` int,
`activity_reduce_amount` decimal(16, 2),
`coupon_reduce_amount` decimal(16, 2),
`original_total_amount` decimal(16, 2),
`feight_fee` decimal(16, 2),
`feight_fee_reduce` decimal(16, 2),
`refundable_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_info');
9.2.24 退单表ods_order_refund_info
CREATE TABLE `hudi_ods`.`ods_order_refund_info` (
`id` bigint,
`user_id` bigint,
`order_id` bigint,
`sku_id` bigint,
`refund_type` varchar(20),
`refund_num` bigint,
`refund_amount` decimal(16, 2),
`refund_reason_type` varchar(200),
`refund_reason_txt` varchar(20),
`refund_status` varchar(10),
`create_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_refund_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_refund_info');
9.2.25 订单状态流水表ods_order_status_log
CREATE TABLE `hudi_ods`.`ods_order_status_log` (
`id` bigint,
`order_id` bigint,
`order_status` varchar(11),
`operate_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_order_status_log',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_order_status_log');
9.2.26 支付表ods_payment_info
CREATE TABLE `hudi_ods`.`ods_payment_info` (
`id` int,
`out_trade_no` varchar(50),
`order_id` bigint,
`user_id` bigint,
`payment_type` varchar(20),
`trade_no` varchar(50),
`total_amount` decimal(10, 2),
`subject` varchar(200),
`payment_status` varchar(20),
`create_time` timestamp(0),
`callback_time` timestamp(0),
`callback_content` string,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_payment_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_payment_info');
9.2.27 退款表ods_refund_payment
CREATE TABLE `hudi_ods`.`ods_refund_payment` (
`id` int,
`out_trade_no` varchar(50),
`order_id` bigint,
`sku_id` bigint,
`payment_type` varchar(20),
`trade_no` varchar(50),
`total_amount` decimal(10, 2),
`subject` varchar(200),
`refund_status` varchar(30),
`create_time` timestamp(0),
`callback_time` timestamp(0),
`callback_content` string,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_refund_payment',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_refund_payment');
9.2.28 用户表ods_user_info
CREATE TABLE `hudi_ods`.`ods_user_info` (
`id` bigint,
`login_name` varchar(200),
`nick_name` varchar(200),
`passwd` varchar(200),
`name` varchar(200),
`phone_num` varchar(200),
`email` varchar(200),
`head_img` varchar(200),
`user_level` varchar(200),
`birthday` date,
`gender` varchar(1),
`create_time` timestamp(0),
`operate_time` timestamp(0),
`status` varchar(200),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ods/ods_user_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_ods',
'hive_sync.table'='ods_user_info');
9.2.29 数据装载
(1)我们将Flink CDC拿到的数据,分别对应hudi_ods库中的每张表进行插入操作。因为对每张表设置了hive_sync,所以每张表在hive的hive_ods数据库中同样有一份数据映射。
(2)在Flink SQL中执行插入语句,每个语句都会启动一个Flink SQL程序,数据将会实时地进入ODS层中。
-- ods_activity_info 插入数据
INSERT INTO hudi_ods.ods_activity_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.activity_info_cdc;
-- select * from hudi_ods.ods_activity_info;
-- ods_activity_rule 插入数据
INSERT INTO hudi_ods.ods_activity_rule
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.activity_rule_cdc;
-- select * from hudi_ods.ods_activity_rule;
-- ods_base_category1 插入数据
INSERT INTO hudi_ods.ods_base_category1
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_category1_cdc;
-- select * from hudi_ods.ods_base_category1;
-- ods_base_category2 插入数据
INSERT INTO hudi_ods.ods_base_category2
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_category2_cdc;
-- select * from hudi_ods.ods_base_category2;
-- ods_base_category3 插入数据
INSERT INTO hudi_ods.ods_base_category3
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_category3_cdc;
-- select * from hudi_ods.ods_base_category3;
-- ods_base_dic 插入数据
INSERT INTO hudi_ods.ods_base_dic
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_dic_cdc;
-- select * from hudi_ods.ods_base_dic;
-- ods_base_province 插入数据
INSERT INTO hudi_ods.ods_base_province
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_province_cdc;
-- select * from hudi_ods.ods_base_province;
-- ods_base_region 插入数据
INSERT INTO hudi_ods.ods_base_region
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_region_cdc;
-- select * from hudi_ods.ods_base_region;
-- ods_base_trademark 插入数据
INSERT INTO hudi_ods.ods_base_trademark
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.base_trademark_cdc;
-- select * from hudi_ods.ods_base_trademark;
-- ods_cart_info 插入数据
INSERT INTO hudi_ods.ods_cart_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.cart_info_cdc;
-- select * from hudi_ods.ods_cart_info;
-- ods_comment_info 插入数据
INSERT INTO hudi_ods.ods_comment_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.comment_info_cdc;
-- select * from hudi_ods.ods_comment_info;
-- ods_coupon_info 插入数据
INSERT INTO hudi_ods.ods_coupon_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.coupon_info_cdc;
-- select * from hudi_ods.ods_coupon_info;
-- ods_coupon_use 插入数据
INSERT INTO hudi_ods.ods_coupon_use
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.coupon_use_cdc;
-- select * from hudi_ods.ods_coupon_use;
-- ods_favor_info 插入数据
INSERT INTO hudi_ods.ods_favor_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.favor_info_cdc;
-- select * from hudi_ods.ods_favor_info;
-- ods_order_detail 插入数据
INSERT INTO hudi_ods.ods_order_detail
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.order_detail_cdc;
-- select * from hudi_ods.ods_order_detail;
-- ods_order_detail_activity 插入数据
INSERT INTO hudi_ods.ods_order_detail_activity
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.order_detail_activity_cdc;
-- select * from hudi_ods.ods_order_detail_activity;
-- ods_order_detail_coupon 插入数据
INSERT INTO hudi_ods.ods_order_detail_coupon
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.order_detail_coupon_cdc;
-- select * from hudi_ods.ods_order_detail_coupon;
-- ods_order_info 插入数据
INSERT INTO hudi_ods.ods_order_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.order_info_cdc;
-- select * from hudi_ods.ods_order_info;
-- ods_order_refund_info 插入数据
INSERT INTO hudi_ods.ods_order_refund_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.order_refund_info_cdc;
-- select * from hudi_ods.ods_order_refund_info;
-- ods_order_status_log 插入数据
INSERT INTO hudi_ods.ods_order_status_log
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.order_status_log_cdc;
-- select * from hudi_ods.ods_order_status_log;
-- ods_payment_info 插入数据
INSERT INTO hudi_ods.ods_payment_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.payment_info_cdc;
-- select * from hudi_ods.ods_payment_info;
-- ods_refund_payment 插入数据
INSERT INTO hudi_ods.ods_refund_payment
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.refund_payment_cdc;
-- select * from hudi_ods.ods_refund_payment;
-- ods_sku_attr_value 插入数据
INSERT INTO hudi_ods.ods_sku_attr_value
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.sku_attr_value_cdc;
-- select * from hudi_ods.ods_sku_attr_value;
-- ods_sku_info 插入数据
INSERT INTO hudi_ods.ods_sku_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.sku_attr_value_cdc;
-- select * from hudi_ods.ods_sku_info;
-- ods_sku_sale_attr_value 插入数据
INSERT INTO hudi_ods.ods_sku_sale_attr_value
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.sku_sale_attr_value_cdc;
-- select * from hudi_ods.ods_sku_sale_attr_value;
-- ods_spu_info 插入数据
INSERT INTO hudi_ods.ods_spu_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.spu_info_cdc;
-- select * from hudi_ods.ods_spu_info;
-- ods_user_info 插入数据
INSERT INTO hudi_ods.ods_user_info
select *
,LOCALTIMESTAMP as ts
,cast(CURRENT_DATE as string) as dt
from flink_cdc.user_info_cdc;
-- select * from hudi_ods.ods_user_info;
c
第10章 湖仓一体之DIM层
DIM层设计要点:
1)DIM层的设计依据是维度建模理论,该层存储维度模型的维度表。
2)日期维度采用文件导入
10.1 商品维度表dim_sku
10.1.1建表语句
CREATE TABLE hudi_dim.dim_sku
(
`id` BIGINT,
`price` DECIMAL(10, 0),
`sku_name` VARCHAR(200),
`sku_desc` VARCHAR(2000),
`weight` DECIMAL(10,2),
`is_sale` BOOLEAN,
`spu_id` BIGINT,
`spu_name` VARCHAR(200),
`category3_id` BIGINT,
`category3_name` VARCHAR(200),
`category2_id` BIGINT,
`category2_name` VARCHAR(200),
`category1_id` BIGINT,
`category1_name` VARCHAR(10),
`tm_id` BIGINT,
`tm_name` VARCHAR(100),
`sku_attr_values` MULTISET<string>,
`sku_sale_attr_values` MULTISET<string>,
`create_time` TIMESTAMP(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_sku',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dim',
'hive_sync.table'='dim_sku');
10.1.2数据装载
insert into hudi_dim.dim_sku
select
sku.id,
sku.price,
sku.sku_name,
sku.sku_desc,
sku.weight,
sku.is_sale,
sku.spu_id,
spu.spu_name,
sku.category3_id,
c3.name,
c3.category2_id,
c2.name,
c2.category1_id,
c1.name,
sku.tm_id,
tm.tm_name,
attr.attrs,
sale_attr.sale_attrs,
sku.create_time,
LOCALTIMESTAMP as ts,
cast(CURRENT_DATE as string) as dt
from hudi_ods.ods_sku_info sku
left join hudi_ods.ods_spu_info spu on sku.spu_id=spu.id
left join hudi_ods.ods_base_category3 c3 on sku.category3_id=c3.id
left join hudi_ods.ods_base_category2 c2 on c3.category2_id=c2.id
left join hudi_ods.ods_base_category1 c1 on c2.category1_id=c1.id
left join hudi_ods.ods_base_trademark tm on sku.tm_id=tm.id
left join
(
select
sku_id,
collect(concat_ws('|',cast(attr_id as string),cast(value_id as string),attr_name,value_name)) attrs
from hudi_ods.ods_sku_attr_value
group by sku_id
) attr on sku.id=attr.sku_id
left join
(
select
sku_id,
collect(concat_ws('|',cast(sale_attr_id as string),cast(sale_attr_value_id as string),sale_attr_name,sale_attr_value_name)) sale_attrs
from hudi_ods.ods_sku_sale_attr_value
group by sku_id
) sale_attr on sku.id=sale_attr.sku_id;
10.2 优惠券维度表dim_coupon
10.2.1建表语句
CREATE TABLE hudi_dim.dim_coupon
(
`id` BIGINT,
`coupon_name` VARCHAR(100),
`coupon_type_code` VARCHAR(10),
`coupon_type_name` VARCHAR(100),
`condition_amount` DECIMAL(10, 2),
`condition_num` BIGINT,
`activity_id` BIGINT,
`benefit_amount` DECIMAL(16, 2),
`benefit_discount` DECIMAL(10, 2),
`benefit_rule` STRING,
`create_time` TIMESTAMP(0),
`range_type_code` VARCHAR(10),
`range_type_name` VARCHAR(100),
`limit_num` INT,
`taken_count` INT,
`start_time` TIMESTAMP(0),
`end_time` TIMESTAMP(0),
`operate_time` TIMESTAMP(0),
`expire_time` TIMESTAMP(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_coupon',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dim',
'hive_sync.table'='dim_coupon');
10.2.2数据装载
insert into hudi_dim.dim_coupon
select
id,
coupon_name,
coupon_type,
coupon_dic.dic_name,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
case coupon_type
when '3201' then concat('满',condition_amount,'元减',benefit_amount,'元')
when '3202' then concat('满',condition_num,'件打',10*(1-benefit_discount),'折')
when '3203' then concat('减',benefit_amount,'元')
end benefit_rule,
create_time,
range_type,
range_dic.dic_name,
limit_num,
taken_count,
start_time,
end_time,
operate_time,
expire_time,
LOCALTIMESTAMP as ts,
cast(CURRENT_DATE as string) as dt
from
(
select
id,
coupon_name,
coupon_type,
condition_amount,
condition_num,
activity_id,
benefit_amount,
benefit_discount,
create_time,
range_type,
limit_num,
taken_count,
start_time,
end_time,
operate_time,
expire_time
from hudi_ods.ods_coupon_info
)ci
left join
(
select
dic_code,
dic_name
from hudi_ods.ods_base_dic
where parent_code='32'
)coupon_dic
on ci.coupon_type=coupon_dic.dic_code
left join
(
select
dic_code,
dic_name
from hudi_ods.ods_base_dic
where parent_code='33'
)range_dic
on ci.range_type=range_dic.dic_code;
10.3 活动维度表dim_activity
10.3.1建表语句
CREATE TABLE hudi_dim.dim_activity
(
`activity_rule_id` int,
`activity_id` bigint,
`activity_name` varchar(200),
`activity_type_code` varchar(20),
`activity_type_name` varchar(100),
`activity_desc` varchar(2000),
`start_time` timestamp(0),
`end_time` timestamp(0),
`create_time` timestamp(0),
`condition_amount` DECIMAL(16, 2),
`condition_num` BIGINT,
`benefit_amount` DECIMAL(16, 2),
`benefit_discount` DECIMAL(10, 2),
`benefit_rule` STRING,
`benefit_level` bigint,
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_activity',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'activity_id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dim',
'hive_sync.table'='dim_activity');
10.3.2数据装载
insert into hudi_dim.dim_activity
select
rule.id,
info.id,
activity_name,
rule.activity_type,
dic.dic_name,
activity_desc,
start_time,
end_time,
create_time,
condition_amount,
condition_num,
benefit_amount,
benefit_discount,
case rule.activity_type
when '3101' then concat('满',cast(condition_amount as string),'元减',cast(benefit_amount as string),'元')
when '3102' then concat('满',cast(condition_amount as string),'件打',cast(10*(1-benefit_discount) as string),'折')
when '3103' then concat('打',cast(10*(1-benefit_discount) as string),'折')
end benefit_rule,
benefit_level,
LOCALTIMESTAMP as ts,
cast(CURRENT_DATE as string) as dt
from hudi_ods.ods_activity_rule rule
left join hudi_ods.ods_activity_info info
on rule.activity_id=info.id
left join
(
select
dic_code,
dic_name
from hudi_ods.ods_base_dic
where parent_code='31'
)dic
on rule.activity_type=dic.dic_code;
10.4 地区维度表dim_province
10.4.1建表语句
CREATE TABLE hudi_dim.dim_province
(
`id` bigint,
`province_name` varchar(20),
`area_code` varchar(20),
`iso_code` varchar(20),
`iso_3166_2` varchar(20),
`region_id` varchar(20),
`region_name` varchar(20),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_province',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dim',
'hive_sync.table'='dim_province');
10.4.2数据装载
insert into hudi_dim.dim_province
select
province.id,
province.name,
province.area_code,
province.iso_code,
province.iso_3166_2,
region_id,
region_name,
LOCALTIMESTAMP as ts,
cast(CURRENT_DATE as string) as dt
from hudi_ods.ods_base_province province
left join hudi_ods.ods_base_region region
on province.region_id=region.id;
10.5 日期维度表dim_date
10.5.1建表语句
CREATE TABLE hudi_dim.dim_date
(
`date_id` STRING,
`week_id` STRING,
`week_day` STRING,
`day` STRING,
`month` STRING,
`quarter` STRING,
`year` STRING,
`is_workday` STRING,
`holiday_id` STRING,
`ts` TIMESTAMP(3)
) WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_date',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'date_id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '1',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '1',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dim',
'hive_sync.table'='dim_date');
10.5.2数据装载
通常情况下,时间维度表的数据并不是来自于业务系统,而是手动写入,并且由于时间维度表数据的可预见性,无须每日导入,一般可一次性导入一年的数据。
(1)将HDFS的文件映射为一张表
CREATE TABLE myflink.date_file_source (
`date_id` STRING,
`week_id` STRING,
`week_day` STRING,
`day` STRING,
`month` STRING,
`quarter` STRING,
`year` STRING,
`is_workday` STRING,
`holiday_id` STRING
) WITH (
'connector' = 'filesystem',
'path' = 'hdfs://hadoop1:8020/tmp_data/date_info.csv',
'format' = 'csv'
);
(2)将数据文件上传到HDFS上临时表路径/tmp_data/date_info.csv
(3)执行以下语句将其导入时间维度表
insert into hudi_dim.dim_date
select
*,
LOCALTIMESTAMP as ts
from myflink.date_file_source;
10.6 用户维度表dim_user_info
10.6.1建表语句
CREATE TABLE hudi_dim.dim_user_info
(
`id` bigint,
`login_name` varchar(200),
`nick_name` varchar(200),
`name` varchar(200),
`phone_num` varchar(200),
`email` varchar(200),
`user_level` varchar(200),
`birthday` date,
`gender` varchar(1),
`create_time` timestamp(0),
`operate_time` timestamp(0),
`start_time` timestamp(0),
`ts` timestamp(3),
`dt` string
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dim/dim_user_info',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dim',
'hive_sync.table'='dim_user_info');
10.6.2数据装载
insert into hudi_dim.dim_user_info
select
id,
login_name,
nick_name,
md5(name) name,
md5(if(regexp(phone_num,'^(13[0-9]|14[01456879]|15[0-35-9]|16[2567]|17[0-8]|18[0-9]|19[0-35-9])\\\d{8}$'),phone_num,CAST(NULL AS STRING))) phone_num,
md5(if(regexp(email,'^[a-zA-Z0-9_-]+@[a-zA-Z0-9_-]+(\\\.[a-zA-Z0-9_-]+)+$'),email,CAST(NULL AS STRING))) email,
user_level,
birthday,
gender,
create_time,
operate_time,
nvl(operate_time,create_time) as start_time,
LOCALTIMESTAMP as ts,
date_format(create_time,'yyyy-MM-dd') as dt
from hudi_ods.ods_user_info;
第11章 湖仓一体之DWD层
DWD层设计要点:
1)DWD层的设计依据是维度建模理论,该层存储维度模型的事实表。
2)DWD层的数据存储格式为orc列式存储+snappy压缩。
3)DWD层表名的命名规范为dwd_数据域_表名_单分区增量全量标识(inc/full)
11.1 交易域加购事务事实表dwd_trade_cart_add
11.1.1建表语句
CREATE TABLE hudi_dwd.dwd_trade_cart_add
(
`id` BIGINT,
`user_id` VARCHAR(200),
`sku_id` BIGINT,
`date_id` STRING,
`create_time` TIMESTAMP(0),
`source_id` BIGINT,
`source_type_code` VARCHAR(20),
`source_type_name` varchar(100),
`sku_num` INT,
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_cart_add',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_trade_cart_add'
);
11.1.2数据装载
insert into hudi_dwd.dwd_trade_cart_add
select
cart.id,
cart.user_id,
cart.sku_id,
cart.dt date_id,
cart.create_time,
cart.source_id,
cart.source_type,
dic.source_type_name,
cart.sku_num,
cart.ts,
cart.dt
from hudi_ods.ods_cart_info cart
left join
(
select
dic_code,
dic_name source_type_name
from hudi_ods.ods_base_dic
where parent_code='24'
)dic
on cart.source_type=dic.dic_code;
11.2 交易域下单事务事实表dwd_trade_order_detail
11.2.1建表语句
CREATE TABLE hudi_dwd.dwd_trade_order_detail
(
`id` BIGINT,
`order_id` BIGINT,
`user_id` BIGINT,
`sku_id` BIGINT,
`province_id` INT,
`activity_id` BIGINT,
`activity_rule_id` BIGINT,
`coupon_id` BIGINT,
`date_id` STRING,
`create_time` TIMESTAMP(0),
`source_id` BIGINT,
`source_type_code` VARCHAR(20),
`source_type_name` VARCHAR(100),
`sku_num` BIGINT,
`split_original_amount` DECIMAL(16, 2),
`split_activity_amount` DECIMAL(16, 2),
`split_coupon_amount` DECIMAL(16, 2),
`split_total_amount` DECIMAL(16, 2),
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_order_detail',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_trade_order_detail'
);
11.2.2数据装载
insert into hudi_dwd.dwd_trade_order_detail
select
od.id,
od.order_id,
oi.user_id,
od.sku_id,
oi.province_id,
act.activity_id,
act.activity_rule_id,
cou.coupon_id,
od.dt date_id,
od.create_time,
od.source_id,
od.source_type,
dic.dic_name,
od.sku_num,
od.sku_num * od.order_price split_original_amount,
nvl(od.split_activity_amount,0.0) split_activity_amount,
nvl(od.split_coupon_amount,0.0) split_coupon_amount,
od.split_total_amount,
od.ts,
od.dt
from hudi_ods.ods_order_detail od
left join
(
select
id,
user_id,
province_id
from hudi_ods.ods_order_info
) oi
on od.order_id = oi.id
left join
(
select
order_detail_id,
activity_id,
activity_rule_id
from hudi_ods.ods_order_detail_activity
) act
on od.id = act.order_detail_id
left join
(
select
order_detail_id,
coupon_id
from hudi_ods.ods_order_detail_coupon
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from hudi_ods.ods_base_dic
where parent_code='24'
)dic
on od.source_type=dic.dic_code;
11.3 交易域支付成功事务事实表dwd_trade_pay_detail_suc
11.3.1建表语句
CREATE TABLE hudi_dwd.dwd_trade_pay_detail_suc
(
`id` BIGINT,
`order_id` BIGINT,
`user_id` BIGINT,
`sku_id` BIGINT,
`province_id` INT,
`activity_id` BIGINT,
`activity_rule_id` BIGINT,
`coupon_id` BIGINT,
`payment_type_code` VARCHAR(20),
`payment_type_name` VARCHAR(100),
`date_id` STRING,
`callback_time` TIMESTAMP(0),
`source_id` BIGINT,
`source_type_code` VARCHAR(20),
`source_type_name` VARCHAR(100),
`sku_num` BIGINT,
`split_original_amount` DECIMAL(16, 2),
`split_activity_amount` DECIMAL(16, 2),
`split_coupon_amount` DECIMAL(16, 2),
`split_payment_amount` DECIMAL(16, 2),
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_pay_detail_suc',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_trade_pay_detail_suc'
);
11.3.2数据装载
insert into hudi_dwd.dwd_trade_pay_detail_suc
select
od.id,
od.order_id,
pi.user_id,
od.sku_id,
oi.province_id,
act.activity_id,
act.activity_rule_id,
cou.coupon_id,
pi.payment_type,
pay_dic.dic_name,
date_format(pi.callback_time,'yyyy-MM-dd') date_id,
pi.callback_time,
od.source_id,
od.source_type,
src_dic.dic_name,
od.sku_num,
od.sku_num * od.order_price split_original_amount,
nvl(od.split_activity_amount,0.0) split_activity_amount,
nvl(od.split_coupon_amount,0.0) split_coupon_amount,
od.split_total_amount,
od.ts,
od.dt
from hudi_ods.ods_order_detail od
join
(
select
user_id,
order_id,
payment_type,
callback_time
from hudi_ods.ods_payment_info
where payment_status='1602'
) pi
on od.order_id=pi.order_id
left join
(
select
id,
province_id
from hudi_ods.ods_order_info
) oi
on od.order_id = oi.id
left join
(
select
order_detail_id,
activity_id,
activity_rule_id
from hudi_ods.ods_order_detail_activity
) act
on od.id = act.order_detail_id
left join
(
select
order_detail_id,
coupon_id
from hudi_ods.ods_order_detail_coupon
) cou
on od.id = cou.order_detail_id
left join
(
select
dic_code,
dic_name
from hudi_ods.ods_base_dic
where parent_code='11'
) pay_dic
on pi.payment_type=pay_dic.dic_code
left join
(
select
dic_code,
dic_name
from hudi_ods.ods_base_dic
where parent_code='24'
)src_dic
on od.source_type=src_dic.dic_code;
11.4 交易域交易流程累积快照事实表dwd_trade_trade_flow_acc
11.4.1建表语句
CREATE TABLE hudi_dwd.dwd_trade_trade_flow_acc
(
`order_id` BIGINT,
`user_id` BIGINT,
`province_id` INT,
`order_date_id` STRING,
`order_time` TIMESTAMP(0),
`payment_date_id` STRING,
`payment_time` TIMESTAMP(0),
`finish_date_id` STRING,
`finish_time` TIMESTAMP(0),
`order_original_amount` DECIMAL(16, 2),
`order_activity_amount` DECIMAL(16, 2),
`order_coupon_amount` DECIMAL(16, 2),
`order_total_amount` DECIMAL(10, 2),
`payment_amount` DECIMAL(10, 2),
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_trade_trade_flow_acc',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'order_id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_trade_trade_flow_acc'
);
11.4.2数据装载
insert into hudi_dwd.dwd_trade_trade_flow_acc
select
oi.id,
oi.user_id,
oi.province_id,
date_format(oi.create_time,'yyyy-MM-dd'),
oi.create_time,
date_format(pi.callback_time,'yyyy-MM-dd'),
pi.callback_time,
date_format(log.operate_time,'yyyy-MM-dd'),
log.operate_time,
oi.original_total_amount,
oi.activity_reduce_amount,
oi.coupon_reduce_amount,
oi.total_amount,
nvl(pi.payment_amount,0.0),
oi.ts,
oi.dt
from hudi_ods.ods_order_info oi
left join
(
select
order_id,
callback_time,
total_amount payment_amount
from hudi_ods.ods_payment_info
where payment_status='1602'
)pi
on oi.id=pi.order_id
left join
(
select
order_id,
operate_time
from hudi_ods.ods_order_status_log
where order_status='1004'
)log
on oi.id=log.order_id;
11.5 工具域优惠券使用(支付)事务事实表dwd_tool_coupon_used
11.5.1建表语句
CREATE TABLE hudi_dwd.dwd_tool_coupon_used
(
`id` BIGINT,
`coupon_id` BIGINT,
`user_id` BIGINT,
`order_id` BIGINT,
`date_id` STRING,
`payment_time` TIMESTAMP(0),
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_tool_coupon_used',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_tool_coupon_used'
);
11.5.2数据装载
insert into hudi_dwd.dwd_tool_coupon_used
select
id,
coupon_id,
user_id,
order_id,
date_format(used_time,'yyyy-MM-dd') date_id,
used_time,
ts,
dt
from hudi_ods.ods_coupon_use
where used_time is not null;
11.6 互动域收藏商品事务事实表dwd_interaction_favor_add
11.6.1建表语句
CREATE TABLE hudi_dwd.dwd_interaction_favor_add
(
`id` BIGINT,
`user_id` BIGINT,
`sku_id` BIGINT,
`date_id` STRING,
`create_time` TIMESTAMP(0),
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_interaction_favor_add',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_interaction_favor_add'
);
11.6.2数据装载
insert into hudi_dwd.dwd_interaction_favor_add
select
id,
user_id,
sku_id,
date_format(create_time,'yyyy-MM-dd') date_id,
create_time,
ts,
dt
from hudi_ods.ods_favor_info;
11.7 流量域页面浏览事务事实表dwd_traffic_page_view
11.7.1建表语句
CREATE TABLE hudi_dwd.dwd_traffic_page_view
(
`uuid` STRING,
`province_id` BIGINT,
`brand` STRING,
`channel` STRING,
`is_new` STRING,
`model` STRING,
`mid_id` STRING,
`operate_system` STRING,
`user_id` STRING,
`version_code` STRING,
`page_item` STRING,
`page_item_type` STRING,
`last_page_id` STRING,
`page_id` STRING,
`source_type` STRING,
`date_id` STRING,
`view_time` STRING,
`session_id` STRING,
`during_time` STRING,
`ts` BIGINT,
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_traffic_page_view',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'uuid',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_traffic_page_view'
);
11.7.2数据装载
insert into hudi_dwd.dwd_traffic_page_view
select
uuid,
province_id,
brand,
channel,
is_new,
model,
mid_id,
operate_system,
user_id,
version_code,
page_item,
page_item_type,
last_page_id,
page_id,
source_type,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') view_time,
session_id,
during_time,
ts,
dt
from
(
select
uuid,
common.ar area_code,
common.ba brand,
common.ch channel,
common.is_new is_new,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
page.during_time,
page.item page_item,
page.item_type page_item_type,
page.last_page_id,
page.page_id,
page.source_type,
ts,
concat(common.mid,'-',last_value(if(page.last_page_id is null,ts,cast(null as string))) over (partition by common.mid order by t)) session_id,
dt
from hudi_ods.ods_log
where page is not null
)log
left join
(
select
id province_id,
area_code
from hudi_ods.ods_base_province
)bp
on log.area_code=bp.area_code;
11.8 用户域用户注册事务事实表dwd_user_register
11.8.1建表语句
CREATE TABLE hudi_dwd.dwd_user_register
(
`user_id` BIGINT,
`date_id` STRING,
`create_time` TIMESTAMP(0),
`channel` STRING,
`province_id` BIGINT,
`version_code` STRING,
`mid_id` STRING,
`brand` STRING,
`model` STRING,
`operate_system` STRING,
`ts` BIGINT,
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_user_register',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'user_id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_user_register'
);
11.8.2数据装载
insert into hudi_dwd.dwd_user_register
select
ui.user_id,
date_format(ui.create_time,'yyyy-MM-dd') date_id,
ui.create_time,
log.channel,
bp.province_id,
log.version_code,
log.mid_id,
log.brand,
log.model,
log.operate_system,
ui.ts,
date_format(ui.create_time,'yyyy-MM-dd') dt
from
(
select
id user_id,
create_time
from hudi_ods.ods_user_info
)ui
left join
(
select
common.ar area_code,
common.ba brand,
common.ch channel,
common.md model,
common.mid mid_id,
common.os operate_system,
common.uid user_id,
common.vc version_code,
ts
from hudi_ods.ods_log
where page.page_id='register'
and common.uid is not null
)log
on cast(ui.user_id as string)=log.user_id
left join
(
select
id province_id,
area_code
from hudi_ods.ods_base_province
)bp
on log.area_code=bp.area_code;
11.9 用户域用户登录事务事实表dwd_user_login
11.9.1建表语句
CREATE TABLE hudi_dwd.dwd_user_login
(
`user_id` STRING,
`date_id` STRING,
`login_time` STRING,
`channel` STRING,
`province_id` BIGINT,
`version_code` STRING,
`mid_id` STRING,
`brand` STRING,
`model` STRING,
`operate_system` STRING,
`ts` TIMESTAMP(3),
`dt` STRING
)
PARTITIONED BY (`dt`)
WITH (
'connector'='hudi',
'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dwd/dwd_user_login',
'table.type'='MERGE_ON_READ',
'hoodie.datasource.write.recordkey.field' = 'user_id',
'hoodie.datasource.write.precombine.field' = 'ts',
'write.bucket_assign.tasks'='1',
'write.tasks' = '4',
'compaction.tasks' = '1',
'compaction.async.enabled' = 'true',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_commits',
'compaction.delta_commits' = '5',
'read.streaming.enabled' = 'true',
'changelog.enabled' = 'true',
'read.streaming.skip_compaction' = 'true',
'hive_sync.enable'='true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',
'hive_sync.db'='hive_dwd',
'hive_sync.table'='dwd_user_login'
);
11.9.2数据装载
insert into hudi_dwd.dwd_user_login
select
user_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd') date_id,
date_format(from_utc_timestamp(ts,'GMT+8'),'yyyy-MM-dd HH:mm:ss') login_time,
channel,
province_id,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
dt
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
dt
from
(
select
user_id,
channel,
area_code,
version_code,
mid_id,
brand,
model,
operate_system,
ts,
dt,
row_number() over (partition by session_id order by ts) rn
from
(
select
common.uid user_id,
common.ch channel,
common.ar area_code,
common.vc version_code,
common.mid mid_id,
common.ba brand,
common.md model,
common.os operate_system,
ts,
concat(common.mid,'-',last_value(if(page.last_page_id is null,ts,cast(null as string))) over (partition by common.mid order by t)) session_id,
dt
from hudi_ods.ods_log
where page is not null
)t1
where user_id is not null
)t2
where rn=1
)t3
left join
(
select
id province_id,
area_code
from hudi_ods.ods_base_province
)bp
on t3.area_code=bp.area_code;