一、dws 层介绍
统计各个主题对象的当天行为,服务于 DWT 层的主题宽表,以及一些业务明细数据,应对特殊需求(例如,购买行为,统计商品复购率)。
二、dws 层用户行为数据
2.1 每日设备行为 dws_uv_detail_daycount
1、介绍
每日设备行为,主要按照设备 id 统计。
2、数据来源
dwd_start_log
3、建表
drop table if exists dws_uv_detail_daycount;
create external table dws_uv_detail_daycount (
`mid_id` string COMMENT '设备唯一标识',
`user_id` string COMMENT '用户标识',
`version_code` string COMMENT '程序版本号',
`version_name` string COMMENT '程序版本名',
`lang` string COMMENT '系统语言',
`source` string COMMENT '渠道号',
`os` string COMMENT '安卓系统版本',
`area` string COMMENT '区域',
`model` string COMMENT '手机型号',
`brand` string COMMENT '手机品牌',
`sdk_version` string COMMENT 'sdkVersion',
`gmail` string COMMENT 'gmail',
`height_width` string COMMENT '屏幕宽高',
`app_time` string COMMENT '客户端日志产生时的时间',
`network` string COMMENT '网络模式',
`lng` string COMMENT '经度',
`lat` string COMMENT '纬度',
`login_count` bigint COMMENT '活跃次数'
)
partitioned by(dt string)
stored as parquet
location '/warehouse/gmall/dws/dws_uv_detail_daycount'
tblproperties ("parquet.compression"="lzo");
4、加载数据
insert overwrite table dws_uv_detail_daycount partition(dt='2020-03-10')
select
mid_id,
concat_ws('|', collect_set(user_id)) user_id,
concat_ws('|', collect_set(version_code)) version_code,
concat_ws('|', collect_set(version_name)) version_name,
concat_ws('|', collect_set(lang))lang,
concat_ws('|', collect_set(source)) source,
concat_ws('|', collect_set(os)) os,
concat_ws('|', collect_set(area)) area,
concat_ws('|', collect_set(model)) model,
concat_ws('|', collect_set(brand)) brand,
concat_ws('|', collect_set(sdk_version)) sdk_version,
concat_ws('|', collect_set(gmail)) gmail,
concat_ws('|', collect_set(height_width)) height_width,
concat_ws('|', collect_set(app_time)) app_time,
concat_ws('|', collect_set(network)) network,
concat_ws('|', collect_set(lng)) lng,
concat_ws('|', collect_set(lat)) lat,
count(*) login_count
from
dwd_start_log
where
dt='2020-03-10'
group by
mid_id;
三、dws 层业务数据表
DWS 层的宽表字段,是站在不同维度的视角去看事实表,重点关注事实表的度量值。
3.1 每日会员行为 dws_user_action_daycount
1、数据来源
dwd_start_log、dwd_fact_cart_info、dwd_fact_order_info、dwd_fact_payment_info。
2、建表
drop table if exists dws_user_action_daycount;
create external table dws_user_action_daycount (
user_id string comment '用户 id',
login_count bigint comment '登录次数',
cart_count bigint comment '加入购物车次数',
cart_amount double comment '加入购物车金额',
order_count bigint comment '下单次数',
order_amount decimal(16,2) comment '下单金额',
payment_count bigint comment '支付次数',
payment_amount decimal(16,2) comment '支付金额'
) COMMENT '每日用户行为'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_user_action_daycount/'
tblproperties ("parquet.compression"="lzo");
3、加载数据
with
tmp_login as
(
select
user_id,
count(*) login_count
from
dwd_start_log
where dt='2020-03-10' and user_id is not null
group by user_id
),
tmp_cart as
(
select
user_id,
count(*) cart_count,
sum(cart_price*sku_num) cart_amount
from
dwd_fact_cart_info
where dt='2020-03-10' and user_id is not null and date_format(create_time,'yyyy-MM-dd')='2020-03-10'
group by user_id
),
tmp_order as
(
select
user_id,
count(*) order_count,
sum(final_total_amount) order_amount
from
dwd_fact_order_info
where dt='2020-03-10'
group by user_id
) ,
tmp_payment as
(
select
user_id,
count(*