# 15-天亮大数据系列教程之一道阿里经典数据分析题精解

• 按照hive sql的语法完成下面任务：

create table if not exists expo_detail_table
(
pvid string comment’唯一标识一次请求’,
user_id string comment’用户id’,
item_id string comment’商品id’
)lifecycle 29;

create table if not exists click_detail_table
(
pvid string comment’唯一标识一次请求’,
user_id string comment’用户id’,
item_id string comment’商品id’
)lifecycle 29;

create table if not exists result1_table
(
user_id string,
item_id string,
expo_cnt bigint comment’曝光数量’,
click_cnt bigint comment’点击数量’
)lifecycle 29;

create table if not exists result2_table
(
item_id string,
uv_ctr double comment’uv曝光点击率’,
uv_ctr_rank bigint comment’uv曝光点击率排名’,
pv_ctr double comment’pv曝光点击率’,
pv_ctr_rank bigint comment’pv曝光点击率排名’
)lifecycle 29;

• 答案：

1、数据准备
–测试插入表数据
insert into table expo_detail_table values(‘p001’,’user001’,’item001’);
insert into table expo_detail_table values(‘p002’,’user001’,’item001’);
insert into table expo_detail_table values(‘p003’,’user001’,’item002’);
insert into table expo_detail_table values(‘p004’,’user002’,’item002’);
insert into table expo_detail_table values(‘p005’,’user002’,’item003’);
insert into table expo_detail_table values(‘p006’,’user003’,’item004’);

–测试插入表数据
insert into table click_detail_table values(‘c001’,’user001’,’item001’);
insert into table click_detail_table values(‘c002’,’user001’,’item001’);
insert into table click_detail_table values(‘c003’,’user002’,’item002’);

2、脚本代码

insert overwrite table
result1_table
select
coalesce(expo.user_id, click.user_id) as user_id,
coalesce(expo.item_id, click.item_id) as item_id,
coalesce(expo.cnt, 0) as expo_cnt,
coalesce(click.cnt, 0) as click_cnt
from
(
select
user_id,
item_id,
count(1) as cnt
from
expo_detail_table
group by
user_id,
item_id
) expo full outer
join (
select
user_id,
item_id,
count(1) as cnt
from
click_detail_table
group by
user_id,
item_id
) click on expo.user_id = click.user_id
and expo.item_id = click.item_id

insert overwrite table result2_table
select
item_id,
uv_ctr,
dense_rank() OVER(ORDER BY uv_ctr DESC) uv_ctr_rank,
pv_ctr,
dense_rank() OVER(ORDER BY pv_ctr DESC) pv_ctr_rank
from
(
select item_id,
sum(if(click_cnt>0,1,0))/sum(if(expo_cnt>0,1,0)) as uv_ctr,
sum(click_cnt)/sum(expo_cnt) as pv_ctr
from result1_table group by item_id
) middle

3、执行结果

user001 item001 2 2
user001 item002 1 0
user002 item002 1 1
user002 item003 1 0
user003 item004 1 0

item001 1.0 1 1.0 1
item002 0.5 2 0.5 2
item003 0.0 3 0.0 3
item004 0.0 3 0.0 3

• 广告
• 抄袭
• 版权
• 政治
• 色情
• 无意义
• 其他

120