第一题
u01 2017/1/21 5
u02 2017/1/23 6
u03 2017/1/22 8
u04 2017/1/20 3
u01 2017/1/23 6
u01 2017/2/21 8
u02 2017/1/23 6
u01 2017/2/22 4
创建表
create table action
(userId string,
visitDate string,
visitCount int)
row format delimited fields terminated by "\t";
导入到表中
load data local inpath '/exprot/data/ldl.txt' into table action
select
userid,
date_format(regexp_replace(visitdate,'/','-'),'yyyy-MM') visitdate,
visitcount
from
action;t1
select
userid,
visitdate,
sum(visitcount) sum_count
from
t1
group by userid,visitdate;t2
select
userid,
visitdate,
sum_count,
sum(sum_count) over(partition by userid order by visitdate)
from
(select
userid,
visitdate,
sum(visitcount) sum_count
from
(select
userid,
date_format(regexp_replace(visitdate,'/','-'),'yyyy-MM') visitdate,
visitcount
from
action)t1
group by userid,visitdate)t2;
第2题 京东
有50W个京东店铺,每个顾客访问任何一个店铺的任何一个商品时都会产生一条访问日志,访问日志存储的表名为visit,访客的用户id为user_id,被访问的店铺名称为shop,请统计:
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
建表:
create table visit(user_id string,shop string) row format delimited fields terminated by '\t';
1)每个店铺的UV(访客数)
2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
第一种方法
select
shop,
count(distinct user_id) uv
from
visit
group by
shop;
第二种方法
1.去重
select
shop,
user_id
from
visit
group by
shop,user_id;t1
2.计数
select
shop,
count(*) uv
from
(select
shop,
user_id
from
visit
group by
shop,user_id)t1
group by
shop;
第二问
1.计算每个人访问每个点的总次数
select
shop,
user_id,
count(*) ct
from
visit
group by
shop,user_id;t1
2.针对同一店铺,对方问次数进行逆序排序,并添加rank值
select
shop,
user_id,
ct,
row_number() over(partition by shop order by ct desc) rk
from
(select
shop,
user_id,
count(*) ct
from
visit
group by
shop,user_id)t1;t2
3.去店铺访问前三的用户
select
shop,
user_id,
ct
from
(select
shop,
user_id,
ct,
row_number() over(partition by shop order by ct desc) rk
from
(select
shop,
user_id,
count(*) ct
from
visit
group by
shop,user_id)t1)t2
where
rk<=3;