第6章需求三:品牌复购率
6.2 DWS层
6.2.1 用户购买商品明细表(宽表)
hive (gmall)>
drop table if exists dws_sale_detail_daycount;
create external table dws_sale_detail_daycount
( user_id string comment ‘用户 id’,
sku_id string comment ‘商品 Id’,
user_gender string comment ‘用户性别’,
user_age string comment ‘用户年龄’,
user_level string comment ‘用户等级’,
order_price decimal(10,2) comment ‘商品价格’,
sku_name string comment ‘商品名称’,
sku_tm_id string comment ‘品牌id’,
sku_category3_id string comment ‘商品三级品类id’,
sku_category2_id string comment ‘商品二级品类id’,
sku_category1_id string comment ‘商品一级品类id’,
sku_category3_name string comment ‘商品三级品类名称’,
sku_category2_name string comment ‘商品二级品类名称’,
sku_category1_name string comment ‘商品一级品类名称’,
spu_id string comment ‘商品 spu’,
sku_num int comment ‘购买个数’,
order_count string comment ‘当日下单单数’,
order_amount string comment ‘当日下单金额’
) COMMENT ‘用户购买商品明细表’
PARTITIONED BY (dt
string)
stored as parquet
location ‘/warehouse/gmall/dws/dws_user_sale_detail_daycount/’
tblproperties (“parquet.compression”=“snappy”);
6.2.2 数据导入
hive (gmall)>
with
tmp_detail as
(
select
user_id,
sku_id,
sum(sku_num) sku_num,
count() order_count,
sum(od.order_pricesku_num) order_amount
from dwd_order_detail od
where od.dt=‘2019-02-10’
group by user_id, sku_id
)
insert overwrite table dws_sale_detail_daycount partition(dt=‘2019-02-10’)
select
tmp_detail.user_id,
tmp_detail.sku_id,
u.gender,
months_between(‘2019-02-10’, u.birthday)/12 age,
u.user_level,
price,
sku_name,
tm_id,
category3_id,
category2_id,
category1_id,
category3_name,
category2_name,
category1_name,
spu_id,
tmp_detail.sku_num,
tmp_detail.order_count,
tmp_detail.order_amount
from tmp_detail
left join dwd_user_info u on tmp_detail.user_id =u.id and u.dt=‘2019-02-10’
left join dwd_sku_info s on tmp_detail.sku_id =s.id and s.dt=‘2019-02-10’
;
6.2.3 数据导入脚本
1)在/home/atguigu/bin目录下创建脚本dws_sale.sh
[atguigu@hadoop102 bin]$ vim dws_sale.sh
在脚本中填写如下内容
#!/bin/bash
定义变量方便修改
APP=gmall
hive=/opt/module/hive/bin/hive
如果是输入的日期按照取输入日期;如果没输入日期取当前时间的前一天
if [ -n “$1” ] ;then
do_date=
1
e
l
s
e
d
o
d
a
t
e
=
‘
d
a
t
e
−
d
"
−
1
d
a
y
"
+
f
i
s
q
l
=
"
s
e
t
h
i
v
e
.
e
x
e
c
.
d
y
n
a
m
i
c
.
p
a
r
t
i
t
i
o
n
.
m
o
d
e
=
n
o
n
s
t
r
i
c
t
;
w
i
t
h
t
m
p
d
e
t
a
i
l
a
s
(
s
e
l
e
c
t
u
s
e
r
i
d
,
s
k
u
i
d
,
s
u
m
(
s
k
u
n
u
m
)
s
k
u
n
u
m
,
c
o
u
n
t
(
∗
)
o
r
d
e
r
c
o
u
n
t
,
s
u
m
(
o
d
.
o
r
d
e
r
p
r
i
c
e
∗
s
k
u
n
u
m
)
o
r
d
e
r
a
m
o
u
n
t
f
r
o
m
"
1 else do_date=`date -d "-1 day"+%F` fi sql=" set hive.exec.dynamic.partition.mode=nonstrict; with tmp_detail as ( select user_id, sku_id, sum(sku_num) sku_num, count(*) order_count, sum(od.order_price*sku_num) order_amount from "
1elsedodate=‘date−d"−1day"+fisql="sethive.exec.dynamic.partition.mode=nonstrict;withtmpdetailas(selectuserid,skuid,sum(skunum)skunum,count(∗)ordercount,sum(od.orderprice∗skunum)orderamountfrom"APP".dwd_order_detail od
where od.dt=‘
d
o
d
a
t
e
′
g
r
o
u
p
b
y
u
s
e
r
i
d
,
s
k
u
i
d
)
i
n
s
e
r
t
o
v
e
r
w
r
i
t
e
t
a
b
l
e
"
do_date' group by user_id, sku_id ) insert overwrite table "
dodate′groupbyuserid,skuid)insertoverwritetable"APP".dws_sale_detail_daycount partition(dt=‘
d
o
d
a
t
e
′
)
s
e
l
e
c
t
t
m
p
d
e
t
a
i
l
.
u
s
e
r
i
d
,
t
m
p
d
e
t
a
i
l
.
s
k
u
i
d
,
u
.
g
e
n
d
e
r
,
m
o
n
t
h
s
b
e
t
w
e
e
n
(
′
do_date') select tmp_detail.user_id, tmp_detail.sku_id, u.gender, months_between('
dodate′)selecttmpdetail.userid,tmpdetail.skuid,u.gender,monthsbetween(′do_date’, u.birthday)/12 age,
u.user_level,
price,
sku_name,
tm_id,
category3_id,
category2_id,
category1_id,
category3_name,
category2_name,
category1_name,
spu_id,
tmp_detail.sku_num,
tmp_detail.order_count,
tmp_detail.order_amount
from tmp_detail
left join "
A
P
P
"
.
d
w
d
u
s
e
r
i
n
f
o
u
o
n
t
m
p
d
e
t
a
i
l
.
u
s
e
r
i
d
=
u
.
i
d
a
n
d
u
.
d
t
=
′
APP".dwd_user_info u on tmp_detail.user_id=u.id and u.dt='
APP".dwduserinfouontmpdetail.userid=u.idandu.dt=′do_date’
left join “
A
P
P
"
.
d
w
d
s
k
u
i
n
f
o
s
o
n
t
m
p
d
e
t
a
i
l
.
s
k
u
i
d
=
s
.
i
d
a
n
d
s
.
d
t
=
′
APP".dwd_sku_info s on tmp_detail.sku_id =s.id and s.dt='
APP".dwdskuinfosontmpdetail.skuid=s.idands.dt=′do_date’;
"
h
i
v
e
−
e
"
hive -e "
hive−e"sql”
2)增加脚本执行权限
[atguigu@hadoop102 bin]$ chmod 777 dws_sale.sh
3)执行脚本导入数据
[atguigu@hadoop102 bin]$ dws_sale.sh 2019-02-11
4)查看导入数据
hive (gmall)>
select * from dws_sale_detail_daycount where dt=‘2019-02-11’ limit 2;