四、数据仓库电商项目——DWS层

DWS层

dws层的数据从dwd层来选取,在建模上进行主题分类,分类后每个主题都制作一张表,按照dwd层的数据,进行每日的轻度聚合,一般是建宽表。

  说明:本次DWS层的数据清洗是与DWT(主题宽表层)相对应的,区别在于DWS层是针对于每日进行统计的,而DWT层则是针对全部进行统计。

DWT层每日设备行为每日会员行为每日商品行为每日优惠券统计 (预留)每日活动统计(预留)每日购买行为
DWT层设备主题宽表会员主题宽表商品主题宽表优惠券主题宽表活动主题宽表会员主题(ADS层)

DWS层每日设备行为(用户行为)

drop table if exists dws_uv_detail_daycount;
create external table dws_uv_detail_daycount
(
    `mid_id` string COMMENT '设备唯一标识',
    `user_id` string COMMENT '用户标识',
    `version_code` string COMMENT '程序版本号', 
    `version_name` string COMMENT '程序版本名', 
    `lang` string COMMENT '系统语言', 
    `source` string COMMENT '渠道号', 
    `os` string COMMENT '安卓系统版本', 
    `area` string COMMENT '区域', 
    `model` string COMMENT '手机型号', 
    `brand` string COMMENT '手机品牌', 
    `sdk_version` string COMMENT 'sdkVersion', 
    `gmail` string COMMENT 'gmail', 
    `height_width` string COMMENT '屏幕宽高',
    `app_time` string COMMENT '客户端日志产生时的时间',
    `network` string COMMENT '网络模式',
    `lng` string COMMENT '经度',
    `lat` string COMMENT '纬度',
    `login_count` bigint COMMENT '活跃次数'
)
partitioned by(dt string)
stored as parquet
location '/warehouse/gmall/dws/dws_uv_detail_daycount'
TBLPROPERTIES('parquet.compression'='lzo');

 
insert overwrite table dws_uv_detail_daycount partition(dt='2020-03-29')
select  
    mid_id,
    concat_ws('|', collect_set(user_id)) user_id,
    concat_ws('|', collect_set(version_code)) version_code,
    concat_ws('|', collect_set(version_name)) version_name,
    concat_ws('|', collect_set(lang))lang,
    concat_ws('|', collect_set(source)) source,
    concat_ws('|', collect_set(os)) os,
    concat_ws('|', collect_set(area)) area, 
    concat_ws('|', collect_set(model)) model,
    concat_ws('|', collect_set(brand)) brand,
    concat_ws('|', collect_set(sdk_version)) sdk_version,
    concat_ws('|', collect_set(gmail)) gmail,
    concat_ws('|', collect_set(height_width)) height_width,
    concat_ws('|', collect_set(app_time)) app_time,
    concat_ws('|', collect_set(network)) network,
    concat_ws('|', collect_set(lng)) lng,
    concat_ws('|', collect_set(lat)) lat,
    count(*) login_count
from dwd_start_log
where dt='2020-03-29'
group by mid_id;

DWS层(业务)

每日用户行为 

 drop table if exists dws_user_action_daycount;
create external table dws_user_action_daycount
(   
    user_id string comment '用户 id',
    login_count bigint comment '登录次数',
    cart_count bigint comment '加入购物车次数',
    cart_amount double comment '加入购物车金额',
    order_count bigint comment '下单次数',
    order_amount    decimal(16,2)  comment '下单金额',
    payment_count   bigint      comment '支付次数',
    payment_amount  decimal(16,2) comment '支付金额'
) COMMENT '每日用户行为'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_user_action_daycount/'
tblproperties ("parquet.compression"="lzo");


WITH tmp_login AS (
		SELECT user_id, count(*) AS login_count
		FROM dwd_start_log
		WHERE dt = '2020-03-29'
			AND user_id IS NOT NULL
		GROUP BY user_id
	), 
	tmp_cart AS (
		SELECT user_id, count(*) AS cart_count
			, sum(cart_price * sku_num) AS cart_amount
		FROM dwd_fact_cart_info
		WHERE dt = '2020-03-29'
			AND user_id IS NOT NULL
			AND date_format(create_time, 'yyyy-MM-dd') = '2020-03-29'
		GROUP BY user_id
	), 
	tmp_order AS (
		SELECT user_id, count(*) AS order_count, sum(final_total_amount) AS order_amount
		FROM dwd_fact_order_info
		WHERE dt = '2020-03-29'
		GROUP BY user_id
	), 
	tmp_payment AS (
		SELECT user_id, count(*) AS payment_count, sum(payment_amount) AS payment_amount
		FROM dwd_fact_payment_info
		WHERE dt = '2020-03-29'
		GROUP BY user_id
	)
INSERT OVERWRITE INTO dws_user_action_daycount PARTITION (dt='2020-03-29')
SELECT user_actions.user_id, sum(user_actions.login_count), sum(user_actions.cart_count)
	, sum(user_actions.cart_amount), sum(user_actions.order_count)
	, sum(user_actions.order_amount), sum(user_actions.payment_count)
	, sum(user_actions.payment_amount)
FROM (
	SELECT user_id, login_count, 0 AS cart_count, 0 AS cart_amount, 0 AS order_count
		, 0 AS order_amount, 0 AS payment_count, 0 AS payment_amount
	FROM tmp_login
	UNION ALL
	SELECT user_id, 0 AS login_count, cart_count, cart_amount, 0 AS order_count
		, 0 AS order_amount, 0 AS payment_count, 0 AS payment_amount
	FROM tmp_cart
	UNION ALL
	SELECT user_id, 0 AS login_count, 0 AS cart_count, 0 AS cart_amount, order_count
		, order_amount, 0 AS payment_count, 0 AS payment_amount
	FROM tmp_order
	UNION ALL
	SELECT user_id, 0 AS login_count, 0 AS cart_count, 0 AS cart_amount, 0 AS order_count
		, 0 AS order_amount, payment_count, payment_amount
	FROM tmp_payment
) user_actions
GROUP BY user_id;

每日商品行为

 drop table if exists dws_sku_action_daycount;
create external table dws_sku_action_daycount 
(   
    sku_id string comment 'sku_id',
    order_count bigint comment '被下单次数',
    order_num bigint comment '被下单件数',
    order_amount decimal(16,2) comment '被下单金额',
    payment_count bigint  comment '被支付次数',
    payment_num bigint comment '被支付件数',
    payment_amount decimal(16,2) comment '被支付金额',
    refund_count bigint  comment '被退款次数',
    refund_num bigint comment '被退款件数',
    refund_amount  decimal(16,2) comment '被退款金额',
    cart_count bigint comment '被加入购物车次数',
    cart_num bigint comment '被加入购物车件数',
    favor_count bigint comment '被收藏次数',
    appraise_good_count bigint comment '好评数',
    appraise_mid_count bigint comment '中评数',
    appraise_bad_count bigint comment '差评数',
    appraise_default_count bigint comment '默认评价数'
) COMMENT '每日商品行为'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_sku_action_daycount/'
tblproperties ("parquet.compression"="lzo");

 

 每日地区统计

drop table if exists dws_area_stats_daycount;
create external table dws_area_stats_daycount(
`id` bigint COMMENT '编号',
`province_name` string COMMENT '省份名称',
`area_code` string COMMENT '地区编码',
`iso_code` string COMMENT 'iso 编码',
`region_id` string COMMENT '地区 ID',
`region_name` string COMMENT '地区名称',
`login_count` string COMMENT '活跃设备数',
`order_count` bigint COMMENT '下单次数',
`order_amount` decimal(20,2) COMMENT '下单金额',
`payment_count` bigint COMMENT '支付次数',
`payment_amount` decimal(20,2) COMMENT '支付金额'
) COMMENT '每日地区统计表'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_area_stats_daycount/'
tblproperties ("parquet.compression"="lzo");
with
tmp_login as
(
select
area_code,
count(*) login_count
from dwd_start_log
where dt='2020-06-14'
group by area_code
),
tmp_op as
(
select
province_id,
sum(if(date_format(create_time,'yyyy-MM-dd')='2020-06-14',1,0))
order_count,
sum(if(date_format(create_time,'yyyy-MM-dd')='2020-06-14',final_total_amount,
0)) order_amount,
sum(if(date_format(payment_time,'yyyy-MM-dd')='2020-06-14',1,0))
payment_count,
sum(if(date_format(payment_time,'yyyy-MM-dd')='2020-06-14',final_total_amoun
t,0)) payment_amount
from dwd_fact_order_info
where (dt='2020-06-14' or dt=date_add('2020-06-14',-1))
group by province_id
)
insert overwrite table dws_area_stats_daycount partition(dt='2020-06-14')
select
pro.id,
pro.province_name,
pro.area_code,
pro.iso_code,
pro.region_id,
pro.region_name,
nvl(tmp_login.login_count,0),
nvl(tmp_op.order_count,0),
nvl(tmp_op.order_amount,0.0),
nvl(tmp_op.payment_count,0),
nvl(tmp_op.payment_amount,0.0)
from dwd_dim_base_province pro
left join tmp_login on pro.area_code=tmp_login.area_code
left join tmp_op on pro.id=tmp_op.province_id;

 

每日用户购买行为

 drop table if exists dws_sale_detail_daycount;
create external table dws_sale_detail_daycount
(   
    user_id   string  comment '用户 id',
    sku_id    string comment '商品 id',
    user_gender  string comment '用户性别',
    user_age string  comment '用户年龄',
    user_level string comment '用户等级',
    order_price decimal(10,2) comment '商品价格',
    sku_name string   comment '商品名称',
    sku_tm_id string   comment '品牌id',
    sku_category3_id string comment '商品三级品类id',
    sku_category2_id string comment '商品二级品类id',
    sku_category1_id string comment '商品一级品类id',
    sku_category3_name string comment '商品三级品类名称',
    sku_category2_name string comment '商品二级品类名称',
    sku_category1_name string comment '商品一级品类名称',
    spu_id  string comment '商品 spu',
    sku_num  int comment '购买个数',
    order_count bigint comment '当日下单单数',
    order_amount decimal(16,2) comment '当日下单金额'
) COMMENT '每日购买行为'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_sale_detail_daycount/'
tblproperties ("parquet.compression"="lzo");


INSERT OVERWRITE INTO dws_sale_detail_daycount PARTITION (dt='2020-03-29')
SELECT op.user_id, op.sku_id, ui.gender
	, months_between('2020-03-29', ui.birthday) / 12 AS age
	, ui.user_level, si.price, si.sku_name, si.tm_id, si.category3_id
	, si.category2_id, si.category1_id, si.category3_name, si.category2_name, si.category1_name
	, si.spu_id, op.sku_num, op.order_count, op.order_amount
FROM (
	SELECT user_id, sku_id, sum(sku_num) AS sku_num
		, count(*) AS order_count, sum(total_amount) AS order_amount
	FROM dwd_fact_order_detail
	WHERE dt = '2020-03-29'
	GROUP BY user_id, sku_id
) op
	JOIN (
		SELECT *
		FROM dwd_dim_user_info_his
		WHERE end_date = '9999-99-99'
	) ui
	ON op.user_id = ui.id
	JOIN (
		SELECT *
		FROM dwd_dim_sku_info
		WHERE dt = '2020-03-29'
	) si
	ON op.sku_id = si.id;

每日优惠券统计(预留)

 drop table if exists dws_coupon_use_daycount;
create external table dws_coupon_use_daycount
(   
    `coupon_id` string  COMMENT '优惠券ID',
    `coupon_name` string COMMENT '购物券名称',
    `coupon_type` string COMMENT '购物券类型 1 现金券 2 折扣券 3 满减券 4 满件打折券',
    `condition_amount` string COMMENT '满额数',
    `condition_num` string COMMENT '满件数',
    `activity_id` string COMMENT '活动编号',
    `benefit_amount` string COMMENT '减金额',
    `benefit_discount` string COMMENT '折扣',
    `create_time` string COMMENT '创建时间',
    `range_type` string COMMENT '范围类型 1、商品 2、品类 3、品牌',
    `spu_id` string COMMENT '商品id',
    `tm_id` string COMMENT '品牌id',
    `category3_id` string COMMENT '品类id',
    `limit_num` string COMMENT '最多领用次数',
    `get_count` bigint COMMENT '领用次数',
    `using_count` bigint COMMENT '使用(下单)次数',
    `used_count` bigint COMMENT '使用(支付)次数'
) COMMENT '每日优惠券统计'
PARTITIONED BY (`dt` string)
stored as parquet
location '/warehouse/gmall/dws/dws_coupon_use_daycount/'
tblproperties ("parquet.compression"="lzo");

 

 

  • 1
    点赞
  • 4
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

四月天03

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值