SELECT t2.userid,
t2.visitmonth,
subtotal_visit_cnt,
sum(subtotal_visit_cnt) over (partition BY userid
ORDER BY visitmonth) AS total_visit_cnt
FROM
(SELECT userid,
visitmonth,
sum(visitcount) AS subtotal_visit_cnt
FROM
(SELECT userid,
date_format(regexp_replace(visitdate,'/','-'),'yyyy-MM') AS visitmonth,
visitcount
FROM test_sql.test1) t1
GROUP BY userid,
visitmonth)t2
ORDER BY t2.userid,
t2.visitmonth
第二题
需求
有50W个京东店铺,每个顾客访客访问任何一个店铺的任何一个商品时都会产生一条访问日志,
访问日志存储的表名为Visit,访客的用户id为user_id,被访问的店铺名称为shop,数据如下:
u1 a
u2 b
u1 b
u1 a
u3 c
u4 b
u1 a
u2 c
u5 b
u4 b
u6 c
u2 c
u1 b
u2 a
u2 a
u3 a
u5 a
u5 a
u5 a
请统计:
(1)每个店铺的UV(访客数)
(2)每个店铺访问次数top3的访客信息。输出店铺名称、访客id、访问次数
(1)方式1:
SELECT shop,
count(DISTINCT user_id)
FROM test_sql.test2
GROUP BY shop
方式2:
SELECT t.shop,
count(*)
FROM
(SELECT user_id,
shop
FROM test_sql.test2
GROUP BY user_id,
shop) t
GROUP BY t.shop
(2)
SELECT t2.shop,
t2.user_id,
t2.cnt
FROM
(SELECT t1.*,
row_number() over(partition BY t1.shop
ORDER BY t1.cnt DESC) rank
FROM
(SELECT user_id,
shop,
count(*) AS cnt
FROM test_sql.test2
GROUP BY user_id,
shop) t1)t2
WHERE rank <= 3