Hudi的应用(4)

第12章 湖仓一体之DWS

设计要点:

1)DWS层的设计参考指标体系。

2)DWS层表名的命名规范为dws_数据域_统计粒度_业务过程_统计周期(1d/nd/td)

注:1d表示最近1日,nd表示最近n日,td表示历史至今。

3)使用flink的sum()函数才可以回撤更新

use modules core,hive;

12.1 最近1日汇总表

12.1.1 交易域用户商品粒度订单最近1日汇总表dws_trade_user_sku_order_1d

1)建表语句

CREATE TABLE hudi_dws.dws_trade_user_sku_order_1d

(

    `user_id`                   BIGINT,

    `sku_id`                    BIGINT,

    `sku_name`                  VARCHAR(200),

    `category1_id`              BIGINT,

    `category1_name`            VARCHAR(10),

    `category2_id`              BIGINT,

    `category2_name`            VARCHAR(200),

    `category3_id`              BIGINT,

    `category3_name`            VARCHAR(200),

    `tm_id`                     BIGINT,

    `tm_name`                   VARCHAR(100),

    `order_count_1d`            BIGINT,

    `order_num_1d`              BIGINT,

    `order_original_amount_1d`  DECIMAL(16, 2),

    `activity_reduce_amount_1d` DECIMAL(16, 2),

    `coupon_reduce_amount_1d`   DECIMAL(16, 2),

    `order_total_amount_1d`     DECIMAL(16, 2),

    `ts`                        TIMESTAMP(3),

    `dt`                        STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_user_sku_order_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id,sku_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_user_sku_order_1d'

);

2)数据装载

insert into hudi_dws.dws_trade_user_sku_order_1d

select

    user_id,

    id,

    sku_name,

    category1_id,

    category1_name,

    category2_id,

    category2_name,

    category3_id,

    category3_name,

    tm_id,

    tm_name,

    order_count_1d,

    order_num_1d,

    order_original_amount_1d,

    activity_reduce_amount_1d,

    coupon_reduce_amount_1d,

    order_total_amount_1d,

    LOCALTIMESTAMP as ts,

    dt

from

(

    select

        dt,

        user_id,

        sku_id,

        count(*) order_count_1d,

        sum(sku_num) order_num_1d,

        sum(split_original_amount) order_original_amount_1d,

        sum(nvl(split_activity_amount,0.0)) activity_reduce_amount_1d,

        sum(nvl(split_coupon_amount,0.0)) coupon_reduce_amount_1d,

        sum(split_total_amount) order_total_amount_1d

    from hudi_dwd.dwd_trade_order_detail

    group by dt,user_id,sku_id

)od

left join

(

    select

        id,

        sku_name,

        category1_id,

        category1_name,

        category2_id,

        category2_name,

        category3_id,

        category3_name,

        tm_id,

        tm_name

    from hudi_dim.dim_sku

)sku

on od.sku_id=sku.id;

12.1.2 交易域用户粒度订单最近1日汇总表dws_trade_user_order_1d

1)建表语句

CREATE TABLE hudi_dws.dws_trade_user_order_1d

(

    `user_id`                   BIGINT,

    `order_count_1d`            BIGINT,

    `order_num_1d`              BIGINT,

    `order_original_amount_1d`  DECIMAL(16, 2),

    `activity_reduce_amount_1d` DECIMAL(16, 2),

    `coupon_reduce_amount_1d`   DECIMAL(16, 2),

    `order_total_amount_1d`     DECIMAL(16, 2),

    `ts`                        TIMESTAMP(3),

    `dt`                        STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_user_order_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_user_order_1d'

);

2)数据装载

insert into hudi_dws.dws_trade_user_order_1d

select

    user_id,

    count(distinct(order_id)),

    sum(sku_num),

    sum(split_original_amount),

    sum(nvl(split_activity_amount,0)),

    sum(nvl(split_coupon_amount,0)),

    sum(split_total_amount),

    LOCALTIMESTAMP as ts,

    dt

from hudi_dwd.dwd_trade_order_detail

group by user_id,dt;

12.1.3 交易域用户粒度加购最近1日汇总表dws_trade_user_cart_add_1d

1)建表语句

CREATE TABLE hudi_dws.dws_trade_user_cart_add_1d

(

    `user_id`           VARCHAR(200),

    `cart_add_count_1d` BIGINT,

    `cart_add_num_1d`   BIGINT,

    `ts`                TIMESTAMP(3),

    `dt`                STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_user_cart_add_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_user_cart_add_1d'

);

2)数据装载

insert into hudi_dws.dws_trade_user_cart_add_1d

select

    user_id,

    count(*),

    sum(sku_num),

    LOCALTIMESTAMP as ts,

    dt

from hudi_dwd.dwd_trade_cart_add

group by user_id,dt;

12.1.4 交易域用户粒度支付最近1日汇总表dws_trade_user_payment_1d

1)建表语句

CREATE TABLE hudi_dws.dws_trade_user_payment_1d

(

    `user_id`           BIGINT,

    `payment_count_1d`  BIGINT,

    `payment_num_1d`    BIGINT,

    `payment_amount_1d` DECIMAL(16, 2),

    `ts`                TIMESTAMP(3),

    `dt`                STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_user_payment_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_user_payment_1d'

);

2)数据装载

insert into hudi_dws.dws_trade_user_payment_1d

select

    user_id,

    count(distinct(order_id)),

    sum(sku_num),

    sum(split_payment_amount),

    LOCALTIMESTAMP as ts,

    dt

from hudi_dwd.dwd_trade_pay_detail_suc

group by user_id,dt;

12.1.5 交易域省份粒度订单最近1日汇总表dws_trade_province_order_1d

1)建表语句

CREATE TABLE hudi_dws.dws_trade_province_order_1d

(

    `province_id`               INT,

    `province_name`             varchar(20),

    `area_code`                 varchar(20),

    `iso_code`                  varchar(20),

    `iso_3166_2`                varchar(20),

    `order_count_1d`            BIGINT,

    `order_original_amount_1d`  DECIMAL(16, 2),

    `activity_reduce_amount_1d` DECIMAL(16, 2),

    `coupon_reduce_amount_1d`   DECIMAL(16, 2),

    `order_total_amount_1d`     DECIMAL(16, 2),

    `ts`                        TIMESTAMP(3),

    `dt`                        STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_province_order_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'province_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_province_order_1d'

);

2)数据装载

insert into hudi_dws.dws_trade_province_order_1d

select

    province_id,

    province_name,

    area_code,

    iso_code,

    iso_3166_2,

    order_count_1d,

    order_original_amount_1d,

    activity_reduce_amount_1d,

    coupon_reduce_amount_1d,

    order_total_amount_1d,

    LOCALTIMESTAMP as ts,

    dt

from

(

    select

        province_id,

        count(distinct(order_id)) order_count_1d,

        sum(split_original_amount) order_original_amount_1d,

        sum(nvl(split_activity_amount,0)) activity_reduce_amount_1d,

        sum(nvl(split_coupon_amount,0)) coupon_reduce_amount_1d,

        sum(split_total_amount) order_total_amount_1d,

        dt

    from hudi_dwd.dwd_trade_order_detail

    group by province_id,dt

)o

left join

(

    select

        id,

        province_name,

        area_code,

        iso_code,

        iso_3166_2

    from hudi_dim.dim_province

)p

on o.province_id=p.id;

12.1.6 工具域用户优惠券粒度优惠券使用(支付)最近1日汇总表dws_tool_user_coupon_coupon_used_1d

1)建表语句

CREATE TABLE hudi_dws.dws_tool_user_coupon_coupon_used_1d

(

    `user_id`          BIGINT,

    `coupon_id`        BIGINT,

    `coupon_name`      VARCHAR(100),

    `coupon_type_code` VARCHAR(10),

    `coupon_type_name` VARCHAR(100),

    `benefit_rule`     STRING,

    `used_count_1d`    BIGINT,

    `ts`               TIMESTAMP(3),

    `dt`               STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_tool_user_coupon_coupon_used_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id,coupon_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_tool_user_coupon_coupon_used_1d'

);  

2)数据装载

insert into hudi_dws.dws_tool_user_coupon_coupon_used_1d

select

    user_id,

    coupon_id,

    coupon_name,

    coupon_type_code,

    coupon_type_name,

    benefit_rule,

    used_count,

    LOCALTIMESTAMP as ts,

    dt

from

(

    select

        dt,

        user_id,

        coupon_id,

        count(*) used_count

    from hudi_dwd.dwd_tool_coupon_used

    group by dt,user_id,coupon_id

)t1

left join

(

    select

        id,

        coupon_name,

        coupon_type_code,

        coupon_type_name,

        benefit_rule

    from hudi_dim.dim_coupon

)t2

on t1.coupon_id=t2.id;

12.1.7 互动域商品粒度收藏商品最近1日汇总表dws_interaction_sku_favor_add_1d

1)建表语句

CREATE TABLE hudi_dws.dws_interaction_sku_favor_add_1d

(

    `sku_id`             BIGINT,

    `sku_name`           VARCHAR(200),

    `category1_id`       BIGINT,

    `category1_name`     VARCHAR(10),

    `category2_id`       BIGINT,

    `category2_name`     VARCHAR(200),

    `category3_id`       BIGINT,

    `category3_name`     VARCHAR(200),

    `tm_id`              BIGINT,

    `tm_name`            VARCHAR(100),

    `favor_add_count_1d` BIGINT,

    `ts`                 TIMESTAMP(3),

    `dt`                 STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_interaction_sku_favor_add_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'sku_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_interaction_sku_favor_add_1d'

);   

2)数据装载

insert into hudi_dws.dws_interaction_sku_favor_add_1d

select

    sku_id,

    sku_name,

    category1_id,

    category1_name,

    category2_id,

    category2_name,

    category3_id,

    category3_name,

    tm_id,

    tm_name,

    favor_add_count,

    LOCALTIMESTAMP as ts,

    dt

from

(

    select

        dt,

        sku_id,

        count(*) favor_add_count

    from hudi_dwd.dwd_interaction_favor_add

    group by dt,sku_id

)favor

left join

(

    select

        id,

        sku_name,

        category1_id,

        category1_name,

        category2_id,

        category2_name,

        category3_id,

        category3_name,

        tm_id,

        tm_name

    from hudi_dim.dim_sku

)sku

on favor.sku_id=sku.id;

12.1.8 流量域会话粒度页面浏览最近1日汇总表dws_traffic_session_page_view_1d

1)建表语句

CREATE TABLE hudi_dws.dws_traffic_session_page_view_1d

(

    `session_id`     STRING,

    `mid_id`         STRING,

    `brand`          STRING,

    `model`          STRING,

    `operate_system` STRING,

    `version_code`   STRING,

    `channel`        STRING,

    `during_time_1d` BIGINT,

    `page_count_1d`  BIGINT,

    `ts`             TIMESTAMP(3),

    `dt`             STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_traffic_session_page_view_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'session_id,mid_id,brand,model,operate_system,version_code,channel',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_traffic_session_page_view_1d'

);

2)数据装载

insert into hudi_dws.dws_traffic_session_page_view_1d

select

    session_id,

    mid_id,

    brand,

    model,

    operate_system,

    version_code,

    channel,

    sum(cast(during_time as bigint)),

    count(*),

    LOCALTIMESTAMP as ts,

    dt

from hudi_dwd.dwd_traffic_page_view

group by dt,session_id,mid_id,brand,model,operate_system,version_code,channel;

12.1.9 流量域访客页面粒度页面浏览最近1日汇总表dws_traffic_page_visitor_page_view_1d

1)建表语句

CREATE TABLE hudi_dws.dws_traffic_page_visitor_page_view_1d

(

    `mid_id`         STRING,

    `brand`          STRING,

    `model`          STRING,

    `operate_system` STRING,

    `page_id`        STRING,

    `during_time_1d` BIGINT,

    `view_count_1d`  BIGINT,

    `ts`             TIMESTAMP(3),

    `dt`             STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_traffic_page_visitor_page_view_1d',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'mid_id,brand,model,operate_system,page_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_traffic_page_visitor_page_view_1d'

);

2)数据装载

insert into hudi_dws.dws_traffic_page_visitor_page_view_1d

select

    mid_id,

    brand,

    model,

    operate_system,

    page_id,

    sum(cast(during_time as bigint)),

    count(*),

    LOCALTIMESTAMP as ts,

    dt

from hudi_dwd.dwd_traffic_page_view

group by dt,mid_id,brand,model,operate_system,page_id;

12.2 最近n日汇总表

12.2.1 交易域用户商品粒度订单最近n日汇总表dws_trade_user_sku_order_nd

1)建表语句

CREATE TABLE hudi_dws.dws_trade_user_sku_order_nd

(

    `user_id`                    BIGINT,

    `sku_id`                     BIGINT,

    `sku_name`                   VARCHAR(200),

    `category1_id`               BIGINT,

    `category1_name`             VARCHAR(10),

    `category2_id`               BIGINT,

    `category2_name`             VARCHAR(200),

    `category3_id`               BIGINT,

    `category3_name`             VARCHAR(200),

    `tm_id`                      BIGINT,

    `tm_name`                    VARCHAR(100),

    `order_count_7d`             BIGINT,

    `order_num_7d`               BIGINT,

    `order_original_amount_7d`   DECIMAL(38, 2),

    `activity_reduce_amount_7d`  DECIMAL(38, 2),

    `coupon_reduce_amount_7d`    DECIMAL(38, 2),

    `order_total_amount_7d`      DECIMAL(38, 2),

    `order_count_30d`            BIGINT,

    `order_num_30d`              BIGINT,

    `order_original_amount_30d`  DECIMAL(38, 2),

    `activity_reduce_amount_30d` DECIMAL(38, 2),

    `coupon_reduce_amount_30d`   DECIMAL(38, 2),

    `order_total_amount_30d`     DECIMAL(38, 2),

    `ts`                         TIMESTAMP(3),

    `dt`                         STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_user_sku_order_nd',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id,sku_id,sku_name,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name,tm_id,tm_name',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_user_sku_order_nd'

);

2)数据装载

insert into hudi_dws.dws_trade_user_sku_order_nd

select

    user_id,

    sku_id,

    sku_name,

    category1_id,

    category1_name,

    category2_id,

    category2_name,

    category3_id,

    category3_name,

    tm_id,

    tm_name,

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_count_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_num_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_original_amount_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),activity_reduce_amount_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),coupon_reduce_amount_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_total_amount_1d,0)),

    sum(order_count_1d),

    sum(order_num_1d),

    sum(order_original_amount_1d),

    sum(activity_reduce_amount_1d),

    sum(coupon_reduce_amount_1d),

    sum(order_total_amount_1d),

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as STRING) dt

from hudi_dws.dws_trade_user_sku_order_1d

where dt >= date_add(CURRENT_DATE,-29)

group by  user_id,sku_id,sku_name,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name,tm_id,tm_name;

12.2.2 交易域省份粒度订单最近n日汇总表dws_trade_province_order_nd

1)建表语句

CREATE TABLE hudi_dws.dws_trade_province_order_nd

(

    `province_id`                INT,

    `province_name`              varchar(20),

    `area_code`                  varchar(20),

    `iso_code`                   varchar(20),

    `iso_3166_2`                 varchar(20),

    `order_count_7d`             BIGINT,

    `order_original_amount_7d`   DECIMAL(38, 2),

    `activity_reduce_amount_7d`  DECIMAL(38, 2),

    `coupon_reduce_amount_7d`    DECIMAL(38, 2),

    `order_total_amount_7d`      DECIMAL(38, 2),

    `order_count_30d`            BIGINT,

    `order_original_amount_30d`  DECIMAL(38, 2),

    `activity_reduce_amount_30d` DECIMAL(38, 2),

    `coupon_reduce_amount_30d`   DECIMAL(38, 2),

    `order_total_amount_30d`     DECIMAL(38, 2),

    `ts`                         TIMESTAMP(3),

    `dt`                         STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_province_order_nd',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'province_id,province_name,area_code,iso_code,iso_3166_2',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_province_order_nd'

);  

2)数据装载

insert into hudi_dws.dws_trade_province_order_nd

select

    province_id,

    province_name,

    area_code,

    iso_code,

    iso_3166_2,

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_count_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_original_amount_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),activity_reduce_amount_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),coupon_reduce_amount_1d,0)),

    sum(if(dt>=date_add(CURRENT_DATE,-6),order_total_amount_1d,0)),

    sum(order_count_1d),

    sum(order_original_amount_1d),

    sum(activity_reduce_amount_1d),

    sum(coupon_reduce_amount_1d),

    sum(order_total_amount_1d),

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as STRING) dt   

from hudi_dws.dws_trade_province_order_1d

where dt>=date_add(CURRENT_DATE,-29)

and dt<=CURRENT_DATE

group by province_id,province_name,area_code,iso_code,iso_3166_2;

12.3 历史至今汇总表

12.3.1 交易域用户粒度订单历史至今汇总表dws_trade_user_order_td

1)建表语句

CREATE TABLE hudi_dws.dws_trade_user_order_td

(

    `user_id`                   BIGINT,

    `order_date_first`          STRING,

    `order_date_last`           STRING,

    `order_count_td`            BIGINT,

    `order_num_td`              BIGINT,

    `original_amount_td`        DECIMAL(38, 2),

    `activity_reduce_amount_td` DECIMAL(38, 2),

    `coupon_reduce_amount_td`   DECIMAL(38, 2),

    `total_amount_td`           DECIMAL(38, 2),

    `ts`                        TIMESTAMP(3),

    `dt`                        STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_trade_user_order_td',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_trade_user_order_td'

);

2)数据装载

insert into hudi_dws.dws_trade_user_order_td

select

    user_id,

    min(dt) login_date_first,

    max(dt) login_date_last,

    sum(order_count_1d) order_count,

    sum(order_num_1d) order_num,

    sum(order_original_amount_1d) original_amount,

    sum(activity_reduce_amount_1d) activity_reduce_amount,

    sum(coupon_reduce_amount_1d) coupon_reduce_amount,

    sum(order_total_amount_1d) total_amount,

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as STRING) dt

from hudi_dws.dws_trade_user_order_1d

group by user_id;

12.3.2 用户域用户粒度登录历史至今汇总表dws_user_user_login_td

1)建表语句

CREATE TABLE hudi_dws.dws_user_user_login_td

(

    `user_id`         bigint,

    `login_date_last` STRING,

    `login_count_td`  BIGINT,

    `ts`              TIMESTAMP(3),

    `dt`              STRING

)

PARTITIONED BY (`dt`)

WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/dws/dws_user_user_login_td',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'user_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_dws',

    'hive_sync.table'='dws_user_user_login_td'

);

2)数据装载

insert into hudi_dws.dws_user_user_login_td

select

    u.id,

    nvl(login_date_last,u.dt),

    nvl(login_count_td,1),

    LOCALTIMESTAMP as ts,

    cast(CURRENT_DATE as STRING) dt

from

(

    select

        id,

        dt

    from hudi_dim.dim_user_info

)u

left join

(

    select

        user_id,

        max(dt) login_date_last,

        count(*) login_count_td

    from hudi_dwd.dwd_user_login

    group by user_id

)l

on cast(u.id as string)=l.user_id;

第13章 湖仓一体之ADS层

13.1 流量主题

13.1.1 各渠道流量统计ads_traffic_stats_by_channel

需求说明如下

统计周期

统计粒度

指标

说明

最近1/7/30日

渠道

访客数

统计访问人数

最近1/7/30日

渠道

会话平均停留时长

统计每个会话平均停留时长

最近1/7/30日

渠道

会话平均浏览页面数

统计每个会话平均浏览页面数

最近1/7/30日

渠道

会话总数

统计会话总数

最近1/7/30日

渠道

跳出率

只有一个页面的会话的比例

1)建表语句

CREATE TABLE hudi_ads.ads_traffic_stats_by_channel

(

    `dt`               STRING,

    `recent_days`      INT,

    `channel`          STRING,

    `uv_count`         BIGINT,

    `avg_duration_sec` BIGINT,

    `avg_page_count`   BIGINT,

    `sv_count`         BIGINT,

    `bounce_rate`      DECIMAL(16, 2),

    `ts`               TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_traffic_stats_by_channel',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days,channel',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_traffic_stats_by_channel'

);

2)数据装载

insert into hudi_ads.ads_traffic_stats_by_channel

select

    dt,

    recent_days,

    channel,

    cast(count(distinct(mid_id)) as bigint) uv_count,

    cast(avg(during_time_1d)/1000 as bigint) avg_duration_sec,

    cast(avg(page_count_1d) as bigint) avg_page_count,

    cast(count(*) as bigint) sv_count,

    cast(sum(if(page_count_1d=1,1,0))/count(*) as decimal(16,2)) bounce_rate,

    LOCALTIMESTAMP as ts

from

(

select

    dt,

    channel,

    mid_id,

    during_time_1d,

    page_count_1d,

    Array[1,7,30] days

from hudi_dws.dws_traffic_session_page_view_1d

) t  

CROSS JOIN UNNEST(days) tmp(recent_days)

where dt>=date_add(dt,-recent_days+1)

group by dt,recent_days,channel;

13.2 用户主题

13.2.1 用户变动统计ads_user_change

该需求包括两个指标,分别为流失用户数和回流用户数,以下为对两个指标的解释说明。

指标

说明

流失用户数

之前活跃过的用户,最近一段时间未活跃,就称为流失用户。此处要求统计7日前(只包含7日前当天)活跃,但最近7日未活跃的用户总数。

回流用户数

之前的活跃用户,一段时间未活跃(流失),今日又活跃了,就称为回流用户。此处要求统计回流用户总数。

1)建表语句

CREATE TABLE hudi_ads.ads_user_change

(

    `dt`               STRING,

    `user_churn_count` BIGINT,

    `user_back_count`  BIGINT,

    `ts`               TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_user_change',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_user_change'

);

2)数据装载

insert into hudi_ads.ads_user_change

select

    churn.dt,

    user_churn_count,

    user_back_count,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        count(*) user_churn_count

    from hudi_dws.dws_user_user_login_td

    where login_date_last=date_add(dt,-7)

    group by dt

)churn

join

(

    select

        dt,

        count(*) user_back_count

    from

    (

        select

            dt,

            user_id,

            login_date_last

        from hudi_dws.dws_user_user_login_td

    )t1

    join

    (

        select

            user_id,

            login_date_last login_date_previous

        from hudi_dws.dws_user_user_login_td

        where dt=date_add(dt,-1)

    )t2

    on t1.user_id=t2.user_id

    where datediff(login_date_last,login_date_previous)>=8

    group by dt

)back

on churn.dt=back.dt;

13.2.2 用户留存率ads_user_retention

留存分析一般包含新增留存和活跃留存分析。

新增留存分析是分析某天的新增用户中,有多少人有后续的活跃行为。活跃留存分析是分析某天的活跃用户中,有多少人有后续的活跃行为。

留存分析是衡量产品对用户价值高低的重要指标。

此处要求统计新增留存率,新增留存率具体是指留存用户数与新增用户数的比值,例如2020-06-14新增100个用户,1日之后(2020-06-15)这100人中有80个人活跃了,那2020-06-14的1日留存数则为80,2020-06-14的1日留存率则为80%。

要求统计每天的1至7日留存率,如下图所示。

1)建表语句

CREATE TABLE hudi_ads.ads_user_retention

(

    `dt`              STRING,

    `create_date`     STRING,

    `retention_day`   INT,

    `retention_count` BIGINT,

    `new_user_count`  BIGINT,

    `retention_rate`  DECIMAL(16, 2),

    `ts`               TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_user_retention',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,create_date',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_user_retention'

);

2)数据装载

insert into hudi_ads.ads_user_retention

select

    t2.dt,

    login_date_first create_date,

    datediff(t2.dt,login_date_first) retention_day,

    sum(if(login_date_last=t2.dt,1,0)) retention_count,

    count(*) new_user_count,

    cast(sum(if(login_date_last=t2.dt,1,0))/count(*)*100 as decimal(16,2)) retention_rate,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        user_id,

        date_id login_date_first

    from hudi_dwd.dwd_user_register

    where dt>=date_add(dt,-7)

    and dt<dt

)t1

join

(

    select

        dt,

        user_id,

        login_date_last

    from hudi_dws.dws_user_user_login_td

    where dt=dt

)t2

on t1.user_id=t2.user_id

and t1.dt=t2.dt

group by t2.dt,login_date_first;

13.2.3 用户新增活跃统计ads_user_stats

需求说明如下

统计周期

指标

指标说明

最近1、7、30日

新增用户数

最近1、7、30日

活跃用户数

1)建表语句

CREATE TABLE hudi_ads.ads_user_stats

(

    `dt`                STRING,

    `recent_days`       BIGINT,

    `new_user_count`    BIGINT,

    `active_user_count` BIGINT,

    `ts`                TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_user_stats',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_user_stats'

);

2)数据装载

insert into hudi_ads.ads_user_stats

select

    t2.dt,

    t2.recent_days,

    new_user_count,

    active_user_count,

    LOCALTIMESTAMP as ts

from

(

    select

        t1.dt dt,

        recent_days,

        sum(if(login_date_last>=date_add(t1.dt,-recent_days+1),1,0)) new_user_count

    from

    (

        select

            dt,

            login_date_last,

            Array[1,7,30] days

        from hudi_dws.dws_user_user_login_td

    )t1

    CROSS JOIN UNNEST(days) tmp(recent_days)

    group by t1.dt,recent_days

)t2

join

(

    select

        t3.dt,

        recent_days,

        sum(if(date_id>=date_add(t3.dt,-recent_days+1),1,0)) active_user_count

    from

    (

        select

            dt,

            date_id,

            Array[1,7,30] days

        from hudi_dwd.dwd_user_register

    )t3

    CROSS JOIN UNNEST(days) tmp(recent_days)

    group by t3.dt,recent_days

)t4

on t2.recent_days=t4.recent_days

and t2.dt=t4.dt;

13.2.4 用户行为漏斗分析ads_user_action

漏斗分析是一个数据分析模型,它能够科学反映一个业务流程从起点到终点各阶段用户转化情况。由于其能将各阶段环节都展示出来,故哪个阶段存在问题,就能一目了然。

该需求要求统计一个完整的购物流程各个阶段的人数,具体说明如下:

统计周期

指标

说明

最近1 日

首页浏览人数

最近1 日

商品详情页浏览人数

最近1 日

加购人数

最近1 日

下单人数

最近1 日

支付人数

支付成功人数

1)建表语句

CREATE TABLE hudi_ads.ads_user_action

(

    `dt`                STRING,

    `home_count`        BIGINT,

    `good_detail_count` BIGINT,

    `cart_count`        BIGINT,

    `order_count`       BIGINT,

    `payment_count`     BIGINT,

    `ts`                TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_user_action',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_user_action'

);     

2)数据装载

insert into hudi_ads.ads_user_action

select

    page.dt,

    home_count,

    good_detail_count,

    cart_count,

    order_count,

    payment_count,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        1 recent_days,

        sum(if(page_id='home',1,0)) home_count,

        sum(if(page_id='good_detail',1,0)) good_detail_count

    from hudi_dws.dws_traffic_page_visitor_page_view_1d

    where page_id in ('home','good_detail')

    group by dt

)page

join

(

    select

        dt,

        1 recent_days,

        count(*) cart_count

    from hudi_dws.dws_trade_user_cart_add_1d

    group by dt

)cart

on page.recent_days=cart.recent_days and page.dt=cart.dt

join

(

    select

        dt,

        1 recent_days,

        count(*) order_count

    from hudi_dws.dws_trade_user_order_1d

    group by dt

)ord

on page.recent_days=ord.recent_days and page.dt=ord.dt

join

(

    select

        dt,

        1 recent_days,

        count(*) payment_count

    from hudi_dws.dws_trade_user_payment_1d

    group by dt

)pay

on page.recent_days=pay.recent_days and page.dt=pay.dt;

13.2.5 新增下单用户统计ads_new_order_user_stats

需求说明如下

统计周期

指标

说明

最近1、7、30日

新增下单人数

1)建表语句

CREATE TABLE hudi_ads.ads_new_order_user_stats

(

    `dt`                   STRING,

    `recent_days`          BIGINT,

    `new_order_user_count` BIGINT,

    `ts`                TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_new_order_user_stats',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_new_order_user_stats'

);         

2)数据装载

insert into hudi_ads.ads_new_order_user_stats

select

    dt,

    recent_days,

    sum(if(order_date_first>=date_add(dt,-recent_days+1),1,0)) new_order_user_count,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        order_date_first,

        Array[1,7,30] days

    from hudi_dws.dws_trade_user_order_td

) t

CROSS JOIN UNNEST(days) tmp(recent_days)

group by dt,recent_days;

13.3 商品主题

13.3.1 最近30日各品牌复购率ads_repeat_purchase_by_tm

需求说明如下

统计周期

统计粒度

指标

说明

最近30日

品牌

复购率

重复购买人数占购买人数比例

1)建表语句

CREATE TABLE hudi_ads.ads_repeat_purchase_by_tm

(

    `dt`                STRING,

    `recent_days`       INT,

    `tm_id`             BIGINT,

    `tm_name`           VARCHAR(100),

    `order_repeat_rate` DECIMAL(16, 2),

    `ts`                TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_repeat_purchase_by_tm',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days,tm_id,tm_name',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_repeat_purchase_by_tm'

);  

2)数据装载

insert into hudi_ads.ads_repeat_purchase_by_tm

select

    dt,

    30,

    tm_id,

    tm_name,

    cast(sum(if(order_count>=2,1,0))/sum(if(order_count>=1,1,0)) as decimal(16,2)),

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        user_id,

        tm_id,

        tm_name,

        sum(order_count_30d) order_count

    from hudi_dws.dws_trade_user_sku_order_nd

    group by dt,user_id, tm_id,tm_name

)t1

group by dt,tm_id,tm_name;

13.3.2 各品牌商品下单统计ads_order_stats_by_tm

需求说明如下

统计周期

统计粒度

指标

说明

最近1、7、30日

品牌

订单数

最近1、7、30日

品牌

订单人数

1)建表语句

CREATE TABLE hudi_ads.ads_order_stats_by_tm

(

    `dt`                      STRING,

    `recent_days`             BIGINT,

    `tm_id`                   BIGINT,

    `tm_name`                 VARCHAR(100),

    `order_count`             BIGINT,

    `order_user_count`        BIGINT,

    `ts`                      TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_order_stats_by_tm',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days,tm_id,tm_name',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_order_stats_by_tm'

);

2)数据装载

insert into hudi_ads.ads_order_stats_by_tm

select

    dt,

    recent_days,

    tm_id,

    tm_name,

    order_count,

    order_user_count,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        1 recent_days,

        tm_id,

        tm_name,

        sum(order_count_1d) order_count,

        count(distinct(user_id)) order_user_count

    from hudi_dws.dws_trade_user_sku_order_1d

    group by dt,tm_id,tm_name

    union all

    select

        dt,

        recent_days,

        tm_id,

        tm_name,

        sum(order_count),

        count(distinct(if(order_count>0,user_id,cast(null as string))))

    from

    (

        select

            dt,

            recent_days,

            user_id,

            tm_id,

            tm_name,

            case recent_days

                when 7 then order_count_7d

                when 30 then order_count_30d

            end order_count

        from

        (

            select

                dt,

                user_id,

                tm_id,

                tm_name,

                order_count_7d,

                order_count_30d,

                Array[7,30] days

            from hudi_dws.dws_trade_user_sku_order_nd

        )t1

        CROSS JOIN UNNEST(days) tmp(recent_days)

    )t2

    group by dt,recent_days,tm_id,tm_name

)odr;

13.3.3 各品类商品下单统计ads_order_stats_by_cate

需求说明如下

统计周期

统计粒度

指标

说明

最近1、7、30日

品类

订单数

最近1、7、30日

品类

订单人数

1)建表语句

CREATE TABLE hudi_ads.ads_order_stats_by_cate

(

    `dt`                      STRING,

    `recent_days`             INT,

    `category1_id`            BIGINT,

    `category1_name`          VARCHAR(10),

    `category2_id`            BIGINT,

    `category2_name`          VARCHAR(200),

    `category3_id`            BIGINT,

    `category3_name`          VARCHAR(200),

    `order_count`             BIGINT,

    `order_user_count`        BIGINT,

    `ts`                      TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_order_stats_by_cate',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_order_stats_by_cate'

);  

2)数据装载

insert into hudi_ads.ads_order_stats_by_cate

select

    dt,

    recent_days,

    category1_id,

    category1_name,

    category2_id,

    category2_name,

    category3_id,

    category3_name,

    order_count,

    order_user_count,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        1 recent_days,

        category1_id,

        category1_name,

        category2_id,

        category2_name,

        category3_id,

        category3_name,

        sum(order_count_1d) order_count,

        count(distinct(user_id)) order_user_count

    from hudi_dws.dws_trade_user_sku_order_1d

    group by dt,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name

    union all

    select

        dt,

        recent_days,

        category1_id,

        category1_name,

        category2_id,

        category2_name,

        category3_id,

        category3_name,

        sum(order_count),

        count(distinct(if(order_count>0,user_id,cast(null as string))))

    from

    (

        select

            dt,

            recent_days,

            user_id,

            category1_id,

            category1_name,

            category2_id,

            category2_name,

            category3_id,

            category3_name,

            case recent_days

                when 7 then order_count_7d

                when 30 then order_count_30d

            end order_count

        from

        (

            select

                dt,

                user_id,

                category1_id,

                category1_name,

                category2_id,

                category2_name,

                category3_id,

                category3_name,

                order_count_7d,

                order_count_30d,

                Array[7,30] days

            from hudi_dws.dws_trade_user_sku_order_nd

        )

        CROSS JOIN UNNEST(days) tmp(recent_days)

    )t1

    group by dt,recent_days,category1_id,category1_name,category2_id,category2_name,category3_id,category3_name

)odr;

13.3.4 各分类商品购物车存量Top3ads_sku_cart_num_top3_by_cate

1)建表语句

CREATE TABLE hudi_ads.ads_sku_cart_num_top3_by_cate

(

    `dt`             STRING,

    `category1_id`   BIGINT,

    `category1_name` VARCHAR(10),

    `category2_id`   BIGINT,

    `category2_name` VARCHAR(200),

    `category3_id`   BIGINT,

    `category3_name` VARCHAR(200),

    `sku_id`         BIGINT,

    `sku_name`       VARCHAR(200),

    `cart_num`       INT,

    `rk`             BIGINT,

    `ts`             TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_sku_cart_num_top3_by_cate',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,sku_id',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_sku_cart_num_top3_by_cate'

);

2)数据装载

insert into hudi_ads.ads_sku_cart_num_top3_by_cate

select

    dt,

    category1_id,

    category1_name,

    category2_id,

    category2_name,

    category3_id,

    category3_name,

    sku_id,

    sku_name,

    cart_num,

    rk,

    LOCALTIMESTAMP as ts

from

(

    select

        cart.dt,

        sku_id,

        sku_name,

        category1_id,

        category1_name,

        category2_id,

        category2_name,

        category3_id,

        category3_name,

        cart_num,

        ROW_NUMBER() over (partition by category1_id,category2_id,category3_id order by cart_num desc) rk

    from

    (

        select

            dt,

            sku_id,

            sum(sku_num) cart_num

        from hudi_dwd.dwd_trade_cart_add

        group by dt,sku_id

    )cart

    left join

    (

        select

            dt,

            id,

            sku_name,

            category1_id,

            category1_name,

            category2_id,

            category2_name,

            category3_id,

            category3_name

        from hudi_dim.dim_sku

    )sku

    on cart.sku_id=sku.id and cart.dt=sku.dt

)t1

where rk<=3;

13.3.5 各品牌商品收藏次数Top3ads_sku_favor_count_top3_by_tm

1)建表语句

CREATE TABLE hudi_ads.ads_sku_favor_count_top3_by_tm

(

    `dt`          STRING,

    `tm_id`       BIGINT,

    `tm_name`     VARCHAR(100),

    `sku_id`      BIGINT,

    `sku_name`    VARCHAR(200),

    `favor_count` BIGINT,

    `rk`          BIGINT,

    `ts`          TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_sku_favor_count_top3_by_tm',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,tm_id,tm_name,sku_id,sku_name,favor_count',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_sku_favor_count_top3_by_tm'

);

2)数据装载

insert into hudi_ads.ads_sku_favor_count_top3_by_tm

select

    dt,

    tm_id,

    tm_name,

    sku_id,

    sku_name,

    favor_add_count_1d,

    rk,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        tm_id,

        tm_name,

        sku_id,

        sku_name,

        favor_add_count_1d,

        ROW_NUMBER() over (partition by tm_id order by favor_add_count_1d desc) rk

    from hudi_dws.dws_interaction_sku_favor_add_1d

)t1

where rk<=3;

13.4 交易主题

13.4.1 下单到支付时间间隔平均值ads_order_to_pay_interval_avg

具体要求:最近1日完成支付的订单的下单时间到支付时间的时间间隔的平均值。

1)建表语句

CREATE TABLE hudi_ads.ads_order_to_pay_interval_avg

(

    `dt`                        STRING,

    `order_to_pay_interval_avg` BIGINT,

    `ts`                        TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_order_to_pay_interval_avg',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_order_to_pay_interval_avg'

);

2)数据装载

insert into hudi_ads.ads_order_to_pay_interval_avg

select

    dt,

    cast(avg(to_unix_timestamp(payment_time)-to_unix_timestamp(order_time)) as bigint),

    LOCALTIMESTAMP as ts

from hudi_dwd.dwd_trade_trade_flow_acc

where payment_date_id is not null

group by dt;

13.4.2 各省份交易统计ads_order_by_province

需求说明如下

统计周期

统计粒度

指标

说明

最近1、7、30日

省份

订单数

最近1、7、30日

省份

订单金额

1)建表语句

CREATE TABLE hudi_ads.ads_order_by_province

(

    `dt`                 STRING,

    `recent_days`        INT,

    `province_id`        INT,

    `province_name`      VARCHAR(20),

    `area_code`          VARCHAR(20),

    `iso_code`           VARCHAR(20),

    `iso_code_3166_2`    VARCHAR(20),

    `order_count`        BIGINT,

    `order_total_amount` DECIMAL(38, 2),

    `ts`                 TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_order_by_province',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,recent_days,province_id,province_name,area_code,iso_code,iso_code_3166_2',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_order_by_province'

);

2)数据装载

insert into hudi_ads.ads_order_by_province

select

    dt,

    1 recent_days,

    province_id,

    province_name,

    area_code,

    iso_code,

    iso_3166_2,

    order_count_1d,

    order_total_amount_1d,

    LOCALTIMESTAMP as ts

from hudi_dws.dws_trade_province_order_1d

union

select

    dt,

    recent_days,

    province_id,

    province_name,

    area_code,

    iso_code,

    iso_3166_2,

    case recent_days

        when 7 then order_count_7d

        when 30 then order_count_30d

    end order_count,

    case recent_days

        when 7 then order_total_amount_7d

        when 30 then order_total_amount_30d

    end order_total_amount,

    LOCALTIMESTAMP as ts

from

(

    select

        dt,

        province_id,

        province_name,

        area_code,

        iso_code,

        iso_3166_2,

        order_count_7d,

        order_count_30d,

        order_total_amount_7d,

        order_total_amount_30d,

        Array[7,30] days

    from hudi_dws.dws_trade_province_order_nd

) t1

CROSS JOIN UNNEST(days) tmp(recent_days);

13.5 优惠券主题

13.5.1 优惠券使用统计ads_coupon_stats

需求说明如下

统计周期

统计粒度

指标

说明

最近1日

优惠券

使用次数

支付才算使用

最近1日

优惠券

使用人数

支付才算使用

1)建表语句

CREATE TABLE hudi_ads.ads_coupon_stats

(

    `dt`              STRING,

    `coupon_id`       BIGINT,

    `coupon_name`     VARCHAR(100),

    `used_count`      BIGINT,

    `used_user_count` BIGINT,

    `ts`              TIMESTAMP(3)

) WITH (

    'connector'='hudi',

    'path' ='hdfs://hadoop102:8020/user/hudi/warehouse/ads/ads_coupon_stats',

    'table.type'='MERGE_ON_READ',

    'hoodie.datasource.write.recordkey.field' = 'dt,coupon_id,coupon_name',

    'hoodie.datasource.write.precombine.field' = 'ts',

    'write.bucket_assign.tasks'='1',

    'write.tasks' = '4',

    'compaction.tasks' = '1',

    'compaction.async.enabled' = 'true',

    'compaction.schedule.enabled' = 'true',

    'compaction.trigger.strategy' = 'num_commits',

    'compaction.delta_commits' = '5',

    'read.streaming.enabled' = 'true',

    'changelog.enabled' = 'true',

    'read.streaming.skip_compaction' = 'true',

    'hive_sync.enable'='true',

    'hive_sync.mode' = 'hms',

    'hive_sync.metastore.uris' = 'thrift://hadoop102:9083',

    'hive_sync.db'='hive_ads',

    'hive_sync.table'='ads_coupon_stats'

);

2)数据装载

insert into hudi_ads.ads_coupon_stats

select

    dt,

    coupon_id,

    coupon_name,

    cast(sum(used_count_1d) as bigint),

    cast(count(*) as bigint),

    LOCALTIMESTAMP as ts

from hudi_dws.dws_tool_user_coupon_coupon_used_1d

group by dt,coupon_id,coupon_name;

第14章 报表数据导出

为方便报表应用使用数据,需将ads各指标的统计结果导出到MySQL数据库中。

14.1 MySQL建库建表

14.1.1 创建数据库

CREATE DATABASE IF NOT EXISTS gmall_report DEFAULT CHARSET utf8 COLLATE utf8_general_ci;

14.1.2 创建表

1)各渠道流量统计

DROP TABLE IF EXISTS `ads_traffic_stats_by_channel`;

CREATE TABLE `ads_traffic_stats_by_channel`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `recent_days` bigint(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',

  `channel` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '渠道',

  `uv_count` bigint(20) NULL DEFAULT NULL COMMENT '访客人数',

  `avg_duration_sec` bigint(20) NULL DEFAULT NULL COMMENT '会话平均停留时长,单位为秒',

  `avg_page_count` bigint(20) NULL DEFAULT NULL COMMENT '会话平均浏览页面数',

  `sv_count` bigint(20) NULL DEFAULT NULL COMMENT '会话数',

  `bounce_rate` decimal(16, 2) NULL DEFAULT NULL COMMENT '跳出率',

  PRIMARY KEY (`dt`, `recent_days`, `channel`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各渠道流量统计' ROW_FORMAT = DYNAMIC;

2)路径分析

DROP TABLE IF EXISTS `ads_page_path`;

CREATE TABLE `ads_page_path`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `source` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '跳转起始页面ID',

  `target` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '跳转终到页面ID',

  `path_count` bigint(20) NULL DEFAULT NULL COMMENT '跳转次数',

  PRIMARY KEY (`dt`, `source`, `target`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '页面浏览路径分析' ROW_FORMAT = DYNAMIC;

3)用户变动统计

DROP TABLE IF EXISTS `ads_user_change`;

CREATE TABLE `ads_user_change`  (

  `dt` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '统计日期',

  `user_churn_count` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '流失用户数',

  `user_back_count` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '回流用户数',

  PRIMARY KEY (`dt`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '用户变动统计' ROW_FORMAT = DYNAMIC;

4)用户留存率

DROP TABLE IF EXISTS `ads_user_retention`;

CREATE TABLE `ads_user_retention`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `create_date` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '用户新增日期',

  `retention_day` int(20) NOT NULL COMMENT '截至当前日期留存天数',

  `retention_count` bigint(20) NULL DEFAULT NULL COMMENT '留存用户数量',

  `new_user_count` bigint(20) NULL DEFAULT NULL COMMENT '新增用户数量',

  `retention_rate` decimal(16, 2) NULL DEFAULT NULL COMMENT '留存率',

  PRIMARY KEY (`dt`, `create_date`, `retention_day`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '留存率' ROW_FORMAT = DYNAMIC;

5)用户新增活跃统计

DROP TABLE IF EXISTS `ads_user_stats`;

CREATE TABLE `ads_user_stats`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `recent_days` bigint(20) NOT NULL COMMENT '最近n日,1:最近1日,7:最近7日,30:最近30日',

  `new_user_count` bigint(20) NULL DEFAULT NULL COMMENT '新增用户数',

  `active_user_count` bigint(20) NULL DEFAULT NULL COMMENT '活跃用户数',

  PRIMARY KEY (`dt`, `recent_days`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '用户新增活跃统计' ROW_FORMAT = DYNAMIC;

6)用户行为漏斗分析

DROP TABLE IF EXISTS `ads_user_action`;

CREATE TABLE `ads_user_action`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `home_count` bigint(20) NULL DEFAULT NULL COMMENT '浏览首页人数',

  `good_detail_count` bigint(20) NULL DEFAULT NULL COMMENT '浏览商品详情页人数',

  `cart_count` bigint(20) NULL DEFAULT NULL COMMENT '加入购物车人数',

  `order_count` bigint(20) NULL DEFAULT NULL COMMENT '下单人数',

  `payment_count` bigint(20) NULL DEFAULT NULL COMMENT '支付人数',

  PRIMARY KEY (`dt`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '漏斗分析' ROW_FORMAT = DYNAMIC;

7)新增下单用户统计

DROP TABLE IF EXISTS `ads_new_order_user_stats`;

CREATE TABLE `ads_new_order_user_stats`  (

  `dt` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `recent_days` bigint(20) NOT NULL,

  `new_order_user_count` bigint(20) NULL DEFAULT NULL,

  PRIMARY KEY (`recent_days`, `dt`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

8)最近7日内连续3日下单用户数

DROP TABLE IF EXISTS `ads_order_continuously_user_count`;

CREATE TABLE `ads_order_continuously_user_count`  (

  `dt` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `recent_days` bigint(20) NOT NULL,

  `order_continuously_user_count` bigint(20) NULL DEFAULT NULL,

  PRIMARY KEY (`dt`, `recent_days`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

9)最近30日各品牌复购率

DROP TABLE IF EXISTS `ads_repeat_purchase_by_tm`;

CREATE TABLE `ads_repeat_purchase_by_tm`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `recent_days` bigint(20) NOT NULL COMMENT '最近天数,30:最近30天',

  `tm_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '品牌ID',

  `tm_name` varchar(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '品牌名称',

  `order_repeat_rate` decimal(16, 2) NULL DEFAULT NULL COMMENT '复购率',

  PRIMARY KEY (`dt`, `recent_days`, `tm_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各品牌复购率统计' ROW_FORMAT = DYNAMIC;

10)各品牌商品下单统计

DROP TABLE IF EXISTS `ads_order_stats_by_tm`;

CREATE TABLE `ads_order_stats_by_tm`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `recent_days` bigint(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',

  `tm_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '品牌ID',

  `tm_name` varchar(32) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '品牌名称',

  `order_count` bigint(20) NULL DEFAULT NULL COMMENT '订单数',

  `order_user_count` bigint(20) NULL DEFAULT NULL COMMENT '订单人数',

  PRIMARY KEY (`dt`, `recent_days`, `tm_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各品牌商品交易统计' ROW_FORMAT = DYNAMIC;

11)各分类商品下单统计

DROP TABLE IF EXISTS `ads_order_stats_by_cate`;

CREATE TABLE `ads_order_stats_by_cate`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `recent_days` bigint(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',

  `category1_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '一级分类id',

  `category1_name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '一级分类名称',

  `category2_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '二级分类id',

  `category2_name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '二级分类名称',

  `category3_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '三级分类id',

  `category3_name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '三级分类名称',

  `order_count` bigint(20) NULL DEFAULT NULL COMMENT '订单数',

  `order_user_count` bigint(20) NULL DEFAULT NULL COMMENT '订单人数',

  PRIMARY KEY (`dt`, `recent_days`, `category1_id`, `category2_id`, `category3_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各分类商品交易统计' ROW_FORMAT = DYNAMIC;

12)各分类商品购物车存量Top3

DROP TABLE IF EXISTS `ads_sku_cart_num_top3_by_cate`;

CREATE TABLE `ads_sku_cart_num_top3_by_cate`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `category1_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '一级分类ID',

  `category1_name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '一级分类名称',

  `category2_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '二级分类ID',

  `category2_name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '二级分类名称',

  `category3_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '三级分类ID',

  `category3_name` varchar(64) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '三级分类名称',

  `sku_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '商品id',

  `sku_name` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '商品名称',

  `cart_num` bigint(20) NULL DEFAULT NULL COMMENT '购物车中商品数量',

  `rk` bigint(20) NULL DEFAULT NULL COMMENT '排名',

  PRIMARY KEY (`dt`, `sku_id`, `category1_id`, `category2_id`, `category3_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各分类商品购物车存量Top10' ROW_FORMAT = DYNAMIC;

13)各品牌商品收藏次数Top3

DROP TABLE IF EXISTS `ads_sku_favor_count_top3_by_tm`;

CREATE TABLE `ads_sku_favor_count_top3_by_tm`  (

  `dt` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `tm_id` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `tm_name` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,

  `sku_id` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `sku_name` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL,

  `favor_count` bigint(20) NULL DEFAULT NULL,

  `rk` bigint(20) NULL DEFAULT NULL,

  PRIMARY KEY (`dt`, `tm_id`, `sku_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

14)下单到支付时间间隔平均值

DROP TABLE IF EXISTS `ads_order_to_pay_interval_avg`;

CREATE TABLE `ads_order_to_pay_interval_avg`  (

  `dt` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `order_to_pay_interval_avg` bigint(20) NULL DEFAULT NULL,

  PRIMARY KEY (`dt`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

15)各省份交易统计

DROP TABLE IF EXISTS `ads_order_by_province`;

CREATE TABLE `ads_order_by_province`  (

  `dt` date NOT NULL COMMENT '统计日期',

  `recent_days` bigint(20) NOT NULL COMMENT '最近天数,1:最近1天,7:最近7天,30:最近30天',

  `province_id` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL COMMENT '省份ID',

  `province_name` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '省份名称',

  `area_code` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '地区编码',

  `iso_code` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '国际标准地区编码',

  `iso_code_3166_2` varchar(16) CHARACTER SET utf8 COLLATE utf8_general_ci NULL DEFAULT NULL COMMENT '国际标准地区编码',

  `order_count` bigint(20) NULL DEFAULT NULL COMMENT '订单数',

  `order_total_amount` decimal(16, 2) NULL DEFAULT NULL COMMENT '订单金额',

  PRIMARY KEY (`dt`, `recent_days`, `province_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci COMMENT = '各地区订单统计' ROW_FORMAT = DYNAMIC;

16)优惠券使用情况统计

DROP TABLE IF EXISTS `ads_coupon_stats`;

CREATE TABLE `ads_coupon_stats`  (

  `dt` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `coupon_id` varchar(20) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `coupon_name` varchar(128) CHARACTER SET utf8 COLLATE utf8_general_ci NOT NULL,

  `used_count` bigint(20) NULL DEFAULT NULL,

  `userd_user_count` bigint(20) NULL DEFAULT NULL,

  PRIMARY KEY (`dt`, `coupon_id`) USING BTREE

) ENGINE = InnoDB CHARACTER SET = utf8 COLLATE = utf8_general_ci ROW_FORMAT = Dynamic;

14.2 数据导出

数据导出工具选用DataX,选用HDFSReader和MySQLWriter。

14.2.1 编写DataX配置文件

我们需要为每个张表编写一个DataX配置文件,此处以ads_traffic_stats_by_channel为例,配置文件内容如下:

{

    "job": {

        "content": [

            {

                "reader": {

                    "name": "hdfsreader",

                    "parameter": {

                        "column": [

                            "*"

                        ],

                        "defaultFS": "hdfs://hadoop102:8020",

                        "encoding": "UTF-8",

                        "fieldDelimiter": "\t",

                        "fileType": "text",

                        "nullFormat": "\\N",

                        "path": "${exportdir}"

                    }

                },

                "writer": {

                    "name": "mysqlwriter",

                    "parameter": {

                        "column": [

                            "dt",

                            "recent_days",

                            "channel",

                            "uv_count",

                            "avg_duration_sec",

                            "avg_page_count",

                            "sv_count",

                            "bounce_rate"

                        ],

                        "connection": [

                            {

                                "jdbcUrl": "jdbc:mysql://hadoop102:3306/gmall_report?useUnicode=true&characterEncoding=utf-8",

                                "table": [

                                    "ads_traffic_stats_by_channel"

                                ]

                            }

                        ],

                        "password": "000000",

                        "username": "root",

                        "writeMode": "replace"

                    }

                }

            }

        ],

        "setting": {

            "errorLimit": {

                "percentage": 0.02,

                "record": 0

            },

            "speed": {

                "channel": 3

            }

        }

    }

}

注:导出路径path参数并未写死,需在提交任务时通过参数动态传入,参数名称为exportdir。

14.2.2 DataX配置文件生成脚本

方便起见,此处提供了DataX配置文件批量生成脚本,脚本内容及使用方式如下。

1)在~/bin目录下创建gen_export_config.py脚本

[atguigu@hadoop102 bin]$ vim ~/bin/gen_export_config.py

脚本内容如下

# coding=utf-8

import json

import getopt

import os

import sys

import MySQLdb

#MySQL相关配置,需根据实际情况作出修改

mysql_host = "hadoop102"

mysql_port = "3306"

mysql_user = "root"

mysql_passwd = "123456"

#HDFS NameNode相关配置,需根据实际情况作出修改

hdfs_nn_host = "hadoop102"

hdfs_nn_port = "8020"

#生成配置文件的目标路径,可根据实际情况作出修改

output_path = "/opt/module/datax/job/export"

def get_connection():

    return MySQLdb.connect(host=mysql_host, port=int(mysql_port), user=mysql_user, passwd=mysql_passwd)

def get_mysql_meta(database, table):

    connection = get_connection()

    cursor = connection.cursor()

    sql = "SELECT COLUMN_NAME,DATA_TYPE from information_schema.COLUMNS WHERE TABLE_SCHEMA=%s AND TABLE_NAME=%s ORDER BY ORDINAL_POSITION"

    cursor.execute(sql, [database, table])

    fetchall = cursor.fetchall()

    cursor.close()

    connection.close()

    return fetchall

def get_mysql_columns(database, table):

    return map(lambda x: x[0], get_mysql_meta(database, table))

def generate_json(target_database, target_table):

    job = {

        "job": {

            "setting": {

                "speed": {

                    "channel": 3

                },

                "errorLimit": {

                    "record": 0,

                    "percentage": 0.02

                }

            },

            "content": [{

                "reader": {

                    "name": "hdfsreader",

                    "parameter": {

                        "path": "${exportdir}",

                        "defaultFS": "hdfs://" + hdfs_nn_host + ":" + hdfs_nn_port,

                        "column": ["*"],

                        "fileType": "text",

                        "encoding": "UTF-8",

                        "fieldDelimiter": "\t",

                        "nullFormat": "\\N"

                    }

                },

                "writer": {

                    "name": "mysqlwriter",

                    "parameter": {

                        "writeMode": "replace",

                        "username": mysql_user,

                        "password": mysql_passwd,

                        "column": get_mysql_columns(target_database, target_table),

                        "connection": [

                            {

                                "jdbcUrl":

                                    "jdbc:mysql://" + mysql_host + ":" + mysql_port + "/" + target_database + "?useUnicode=true&characterEncoding=utf-8",

                                "table": [target_table]

                            }

                        ]

                    }

                }

            }]

        }

    }

    if not os.path.exists(output_path):

        os.makedirs(output_path)

    with open(os.path.join(output_path, ".".join([target_database, target_table, "json"])), "w") as f:

        json.dump(job, f)

def main(args):

    target_database = ""

    target_table = ""

    options, arguments = getopt.getopt(args, '-d:-t:', ['targetdb=', 'targettbl='])

    for opt_name, opt_value in options:

        if opt_name in ('-d', '--targetdb'):

            target_database = opt_value

        if opt_name in ('-t', '--targettbl'):

            target_table = opt_value

    generate_json(target_database, target_table)

if __name__ == '__main__':

    main(sys.argv[1:])

注:

(1)安装Python Mysql驱动

由于需要使用Python访问Mysql数据库,故需安装驱动,命令如下:

[atguigu@hadoop102 bin]$ sudo yum install -y MySQL-python

(2)脚本使用说明

python gen_export_config.py -d database -t table

通过-d传入MySQL数据库名,-t传入MySQL表名,执行上述命令即可生成该表的DataX同步配置文件。

2)~/bin目录下创建gen_export_config.sh脚本

[atguigu@hadoop102 bin]$ vim ~/bin/gen_export_config.sh

脚本内容如下

#!/bin/bash

python ~/bin/gen_export_config.py -d gmall_report -t ads_coupon_stats

python ~/bin/gen_export_config.py -d gmall_report -t ads_new_order_user_stats

python ~/bin/gen_export_config.py -d gmall_report -t ads_order_by_province

python ~/bin/gen_export_config.py -d gmall_report -t ads_order_continuously_user_count

python ~/bin/gen_export_config.py -d gmall_report -t ads_order_stats_by_cate

python ~/bin/gen_export_config.py -d gmall_report -t ads_order_stats_by_tm

python ~/bin/gen_export_config.py -d gmall_report -t ads_order_to_pay_interval_avg

python ~/bin/gen_export_config.py -d gmall_report -t ads_page_path

python ~/bin/gen_export_config.py -d gmall_report -t ads_repeat_purchase_by_tm

python ~/bin/gen_export_config.py -d gmall_report -t ads_sku_cart_num_top3_by_cate

python ~/bin/gen_export_config.py -d gmall_report -t ads_sku_favor_count_top3_by_tm

python ~/bin/gen_export_config.py -d gmall_report -t ads_traffic_stats_by_channel

python ~/bin/gen_export_config.py -d gmall_report -t ads_user_action

python ~/bin/gen_export_config.py -d gmall_report -t ads_user_change

python ~/bin/gen_export_config.py -d gmall_report -t ads_user_retention

python ~/bin/gen_export_config.py -d gmall_report -t ads_user_stats

3)为gen_export_config.sh脚本增加执行权限

[atguigu@hadoop102 bin]$ chmod +x ~/bin/gen_export_config.sh

4)执行gen_export_config.sh脚本,生成配置文件

[atguigu@hadoop102 bin]$ gen_export_config.sh

5)观察生成的配置文件

[atguigu@hadoop102 bin]$ ls /opt/module/datax/job/export/

总用量 64

gmall_report.ads_activity_stats.json                 gmall_report.ads_trade_stats_by_cate.json

gmall_report.ads_coupon_stats.json                   gmall_report.ads_trade_stats_by_tm.json

gmall_report.ads_new_buyer_stats.json                gmall_report.ads_trade_stats.json

gmall_report.ads_order_by_province.json              gmall_report.ads_traffic_stats_by_channel.json

gmall_report.ads_user_action.json

gmall_report.ads_page_path.json                      gmall_report.ads_user_change.json

gmall_report.ads_repeat_purchase_by_tm.json          gmall_report.ads_user_retention.json

gmall_report.ads_sku_cart_num_top3_by_cate.json      gmall_report.ads_user_stats.json

14.2.3 测试生成的DataX配置文件

以ads_traffic_stats_by_channel为例,测试用脚本生成的配置文件是否可用。

1)执行DataX同步命令

[atguigu@hadoop102 bin]$ python /opt/module/datax/bin/datax.py -p"-Dexportdir=/warehouse/gmall/ads/ads_traffic_stats_by_channel" /opt/module/datax/job/export/gmall_report.ads_traffic_stats_by_channel.json

2)观察同步结果

观察MySQL目标表是否出现数据。

14.2.4 编写每日导出脚本

(1)在hadoop102的/home/atguigu/bin目录下创建hdfs_to_mysql.sh

[atguigu@hadoop102 bin]$ vim hdfs_to_mysql.sh

(2)编写如下内容

#! /bin/bash

DATAX_HOME=/opt/module/datax

#DataX导出路径不允许存在空文件,该函数作用为清理空文件

handle_export_path(){

  for i in `hadoop fs -ls -R $1 | awk '{print $8}'`; do

    hadoop fs -test -z $i

    if [[ $? -eq 0 ]]; then

      echo "$i文件大小为0,正在删除"

      hadoop fs -rm -r -f $i

    fi

  done

}

#数据导出

export_data() {

  datax_config=$1

  export_dir=$2

  handle_export_path $export_dir

  $DATAX_HOME/bin/datax.py -p"-Dexportdir=$export_dir" $datax_config

}

case $1 in

  "ads_coupon_stats")

    export_data /opt/module/datax/job/export/gmall_report.ads_coupon_stats.json /warehouse/gmall/ads/ads_coupon_stats

  ;;

  "ads_new_order_user_stats")

    export_data /opt/module/datax/job/export/gmall_report.ads_new_order_user_stats.json /warehouse/gmall/ads/ads_new_order_user_stats

  ;;  

  "ads_order_by_province")

    export_data /opt/module/datax/job/export/gmall_report.ads_order_by_province.json /warehouse/gmall/ads/ads_order_by_province

  ;;

  "ads_order_continuously_user_count")

    export_data /opt/module/datax/job/export/gmall_report.ads_order_continuously_user_count.json /warehouse/gmall/ads/ads_order_continuously_user_count

  ;;

  "ads_order_stats_by_cate")

    export_data /opt/module/datax/job/export/gmall_report.ads_order_stats_by_cate.json /warehouse/gmall/ads/ads_order_stats_by_cate

  ;;

  "ads_order_stats_by_tm")

    export_data /opt/module/datax/job/export/gmall_report.ads_order_stats_by_tm.json /warehouse/gmall/ads/ads_order_stats_by_tm

  ;;  

  "ads_order_to_pay_interval_avg")

    export_data /opt/module/datax/job/export/gmall_report.ads_order_to_pay_interval_avg.json /warehouse/gmall/ads/ads_order_to_pay_interval_avg

  ;;

  "ads_page_path")

    export_data /opt/module/datax/job/export/gmall_report.ads_page_path.json /warehouse/gmall/ads/ads_page_path

  ;;

  "ads_repeat_purchase_by_tm")

    export_data /opt/module/datax/job/export/gmall_report.ads_repeat_purchase_by_tm.json /warehouse/gmall/ads/ads_repeat_purchase_by_tm

  ;;

  "ads_sku_cart_num_top3_by_cate")

    export_data /opt/module/datax/job/export/gmall_report.ads_sku_cart_num_top3_by_cate.json /warehouse/gmall/ads/ads_sku_cart_num_top3_by_cate

  ;;  

  "ads_sku_favor_count_top3_by_tm")

    export_data /opt/module/datax/job/export/gmall_report.ads_sku_favor_count_top3_by_tm.json /warehouse/gmall/ads/ads_sku_favor_count_top3_by_tm

  ;;

  "ads_traffic_stats_by_channel")

    export_data /opt/module/datax/job/export/gmall_report.ads_traffic_stats_by_channel.json /warehouse/gmall/ads/ads_traffic_stats_by_channel

  ;;

  "ads_user_action")

    export_data /opt/module/datax/job/export/gmall_report.ads_user_action.json /warehouse/gmall/ads/ads_user_action

  ;;

  "ads_user_change")

    export_data /opt/module/datax/job/export/gmall_report.ads_user_change.json /warehouse/gmall/ads/ads_user_change

  ;;  

  "ads_user_retention")

    export_data /opt/module/datax/job/export/gmall_report.ads_user_retention.json /warehouse/gmall/ads/ads_user_retention

  ;;

  "ads_user_stats")

    export_data /opt/module/datax/job/export/gmall_report.ads_user_stats.json /warehouse/gmall/ads/ads_user_stats

  ;;

  "all")

    export_data /opt/module/datax/job/export/gmall_report.ads_coupon_stats.json /warehouse/gmall/ads/ads_coupon_stats

    export_data /opt/module/datax/job/export/gmall_report.ads_new_order_user_stats.json /warehouse/gmall/ads/ads_new_order_user_stats

    export_data /opt/module/datax/job/export/gmall_report.ads_order_by_province.json /warehouse/gmall/ads/ads_order_by_province

    export_data /opt/module/datax/job/export/gmall_report.ads_order_continuously_user_count.json /warehouse/gmall/ads/ads_order_continuously_user_count

    export_data /opt/module/datax/job/export/gmall_report.ads_order_stats_by_cate.json /warehouse/gmall/ads/ads_order_stats_by_cate

    export_data /opt/module/datax/job/export/gmall_report.ads_order_stats_by_tm.json /warehouse/gmall/ads/ads_order_stats_by_tm

    export_data /opt/module/datax/job/export/gmall_report.ads_order_to_pay_interval_avg.json /warehouse/gmall/ads/ads_order_to_pay_interval_avg

    export_data /opt/module/datax/job/export/gmall_report.ads_page_path.json /warehouse/gmall/ads/ads_page_path

    export_data /opt/module/datax/job/export/gmall_report.ads_repeat_purchase_by_tm.json /warehouse/gmall/ads/ads_repeat_purchase_by_tm

    export_data /opt/module/datax/job/export/gmall_report.ads_sku_cart_num_top3_by_cate.json /warehouse/gmall/ads/ads_sku_cart_num_top3_by_cate

    export_data /opt/module/datax/job/export/gmall_report.ads_sku_favor_count_top3_by_tm.json /warehouse/gmall/ads/ads_sku_favor_count_top3_by_tm

    export_data /opt/module/datax/job/export/gmall_report.ads_traffic_stats_by_channel.json /warehouse/gmall/ads/ads_traffic_stats_by_channel

    export_data /opt/module/datax/job/export/gmall_report.ads_user_action.json /warehouse/gmall/ads/ads_user_action

    export_data /opt/module/datax/job/export/gmall_report.ads_user_change.json /warehouse/gmall/ads/ads_user_change

    export_data /opt/module/datax/job/export/gmall_report.ads_user_retention.json /warehouse/gmall/ads/ads_user_retention

    export_data /opt/module/datax/job/export/gmall_report.ads_user_stats.json /warehouse/gmall/ads/ads_user_stats

  ;;

esac

(3)增加脚本执行权限

[atguigu@hadoop102 bin]$ chmod +x hdfs_to_mysql.sh

(4)脚本用法

[atguigu@hadoop102 bin]$ hdfs_to_mysql.sh all

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值