/*
一、建表、统计与程序异常分析(60分)
1.1
某公司地区业务有两张门店相关的表(数据见:homework.xlsx): 一张为门店信息表:
表名:area_table
字段内容 字段名 字段类型 长度
店铺id store_id varchar 10
店铺位置 area varchar 20
店长id leader_id int
*/
drop TABLE if EXISTS area_table;
CREATE TABLE area_table(
store_id varchar(10),
area varchar(20),
leader_id int
);
/*
1.2
第二张为门店销售信息表:
表名:store_table
字段内容 字段名 字段类型 长度
店铺id store_id varchar 10
订单id order_id varchar 12
销量 sales_volume int
销售日期 salesdate date
*/
DROP TABLE if EXISTS store_table;
CREATE TABLE store_table(
store_id varchar(10),
order_id varchar(12),
sales_volume int ,
salesdate date
);
/*
2.统计
2.1.统计每日每个店铺的销量
*/
SELECT
store_id,
salesdate,
sum(sales_volume) 销量
FROM store_table
GROUP BY
store_id,
salesdate
;
/*
2.2.统计每日每个区域的销量(10分)
*/
SELECT area,salesdate,sum(sales_volume) 销量
FROM area_table
LEFT JOIN store_table
on area_table.store_id=store_table.store_id
GROUP BY area,salesdate ORDER BY salesdate desc;
SELECT t1.store_id,IFNULL(sales_volumes,0) 'sales_volumes' FROM area_table t1
LEFT JOIN
(
select
a.store_id,
sum(sales_volume) as sales_volumes
from area_table a
left join
store_table b
on a.store_id=b.store_id
where a.area='alpha'
and b.salesdate=DATE_SUB('2020-08-18',INTERVAL 1 DAY)
GROUP BY a.store_id
order by a.store_id) t2 ON t1.store_id=t2.store_id WHERE t1.area='alpha';
WITH t1 as (SELECT store_id,area FROM area_table WHERE area='alpha')
SELECT t1.store_id,IFNULL(sales_volumes,0) 'sales_volumes' FROM t1
LEFT JOIN
(
select
t1.store_id,
sum(sales_volume) as sales_volumes
from t1
left join
store_table b
on t1.store_id=b.store_id
where t1.area='alpha'
and b.salesdate=DATE_SUB('2020-08-18',INTERVAL 1 DAY)
GROUP BY t1.store_id
order by t1.store_id)
t2 ON t1.store_id=t2.store_id WHERE t1.area='alpha'
;
– 请拆解如下语句:
– 本语句为某视频网站公司对于k-pop业务线的各渠道用户、新增用户数据抽取语句
with temp_active_hour_table_kps as
(
select
a0.dt
,product_id
,mkey
,substr(FROM_UNIXTIME(st_time),12,2) as hour
,a0.device_id
from
(select
dt
,product_id
,st_time
,device_id
from kps_dwd.kps_dwd_dd_view_user_active
where dt=’
d
t
1
′
)
a
0
l
e
f
t
j
o
i
n
(
s
e
l
e
c
t
d
t
,
m
k
e
y
,
d
e
v
i
c
e
i
d
f
r
o
m
k
p
s
d
w
d
.
k
p
s
d
w
d
d
d
u
s
e
r
c
h
a
n
n
e
l
s
w
h
e
r
e
d
t
=
′
{dt_1}' ) a0 left join (select dt ,mkey ,device_id from kps_dwd.kps_dwd_dd_user_channels where dt='
dt1′)a0leftjoin(selectdt,mkey,deviceidfromkpsdwd.kpsdwddduserchannelswheredt=′{dt_1}’
) a1
on a0.device_id = a1.device_id
)
select
dt
,product
,product_id
,a1.mkey
,name_cn
,hour
,status
,dau
,new
from
(select
dt
,‘K-pop’ as product
,product_id
,mkey
,hour
,status
,count(distinct a.device_id) as dau
,count(distinct if(b.device_id is not null,a.device_id,null) as new
from
(select
dt
,product_id
,mkey
,hour
,device_id
,‘active’ as status
from temp_active_hour_table_kps
group by dt,mkey,product_id,device_id,hour
union all
select
dt
,product_id
,mkey
,min(hour) as hour
,device_id
,‘first’ as status
from temp_active_hour_table_kps
group by dt,mkey,product_id,device_id
) a
left join
(
select
dt
,device_id
from kps_dwd.kps_dwd_dd_fact_view_new_user
where dt=’${dt_1}’
group by dt,device_id
)b on a.dt=b.dt and a.device_id = b.device_id
group by dt
– 拆解步骤如下,请将下列横线处拆解内容补充完整
– 以下题4个空,每空10分
# 建立数据表
-- use lagou;
create table user_trade
(
user_name varchar(20),
piece int,
price double,
pay_amount double,
goods_category varchar(20),
pay_time date
);
----)
);
-- 需求1: 查询出2019年每月的支付总额和当年累积支付总额
/*
需求1: 查询出2019年每月的支付总额和当年累积支付总额
*/
SELECT * FROM user_trade WHERE YEAR(pay_time)="2019";
SELECT
MONTH,
pay_amount,
sum(t1.pay_amount) over (ORDER BY t1.`month`)
-- 没指定row 从头计算到当前行
FROM
(
SELECT MONTH(t.pay_time) 'month',sum(pay_amount) pay_amount
FROM user_trade as t
WHERE YEAR(pay_time)="2019"
GROUP BY MONTH(t.pay_time))
as t1;
-- 需求2:查询出2018-2019年每月的支付总额和当年累积支付总额
SELECT b.YEAR,b.MONTH ,sum(b.pay_amount) over(PARTITION by b.YEAR ORDER BY b.month)
-- 基于年与月开窗
FROM
(
SELECT YEAR(t.pay_time) YEAR,MONTH(t.pay_time) MONTH,sum(t.pay_amount) pay_amount FROM user_trade as t
GROUP BY YEAR(t.pay_time),month(t.pay_time)) b;
-- 需求3: 查询出2019年每个月的近三月移动平均支付金额
SELECT b.MONTH ,b.pay_amount,AVG(b.pay_amount) over(ORDER BY b.month rows BETWEEN 2 preceding and current row )
-- 基于年与月开窗
FROM
(
SELECT MONTH(t.pay_time) MONTH,sum(t.pay_amount) pay_amount FROM user_trade as t
WHERE YEAR(t.pay_time)='2019'
GROUP BY month(t.pay_time)) b;
-- 需求4: 查询出每四个月的最大月总支付金额
SELECT b.MONTH ,
b.pay_amount,
max(b.pay_amount)
over(ORDER BY b.MONTH rows BETWEEN 3 preceding and current row )
-- 基于年与月开窗
FROM
(
SELECT
left(t.pay_time,7) MONTH,
sum(t.pay_amount) pay_amount
FROM user_trade as t
GROUP BY left(t.pay_time,7)) b;
-- ---------------------------------------------
SELECT
left(t.pay_time,7) MONTH,
sum(t.pay_amount) pay_amount ,
max(sum(t.pay_amount))
over(ORDER BY left(t.pay_time,7) rows BETWEEN 3 preceding and current row )
FROM user_trade as t
GROUP BY left(t.pay_time,7);
-- 需求5: 2020年1月,购买商品品类数的用户排名
SELECT
user_name,
COUNT(DISTINCT goods_category) category_count,
ROW_NUMBER() over(ORDER BY COUNT(DISTINCT goods_category) ) order1, -- 行编号
rank() over(ORDER BY COUNT(DISTINCT goods_category) ) order2,
DENSE_RANK() over(ORDER BY COUNT(DISTINCT goods_category) ) order3
FROM
user_trade
WHERE
LEFT(pay_time,7)='2020-01'
GROUP BY user_name ORDER BY category_count ;
-- 需求6: 查询出将2020年2月的支付用户,按照支付金额分成5组后的结果
SELECT
user_name,
sum( pay_amount ) pay_amount,-- 切片
NTILE( 5 ) over ( ORDER BY sum( pay_amount ) DESC ) LEVEL
FROM
user_trade
WHERE
LEFT ( pay_time, 7 )= "2020-02"
GROUP BY
user_name;
-- 需求7: 查询出2020年支付金额排名前30%的所有用户
SELECT
a.user_name,
a.pay_amount,
LEVEL
FROM
(
SELECT
user_name,
sum( pay_amount ) pay_amount,
NTILE( 10 ) over ( ORDER BY sum( pay_amount ) DESC ) LEVEL
FROM
user_trade
WHERE
YEAR ( pay_time ) = '2020'
GROUP BY
user_name
) a
WHERE
LEVEL IN ( 1, 2, 3 );
-- 需求8: 查询出King和West的时间偏移(前N行)
SELECT
user_name,
pay_time,
lag( pay_time, 1, pay_time ) over ( PARTITION BY user_name ORDER BY pay_time ) lag1,-- 没有传入偏移量,那么默认就是1,找不到的话,此处也没有给默认值,
为
NULL lag( pay_time ) over ( PARTITION BY user_name ORDER BY pay_time ) lag1_s,
lag( pay_time, 2, pay_time ) over ( PARTITION BY user_name ORDER BY pay_time ) lag2,
lag( pay_time, 2 ) over ( PARTITION BY user_name ORDER BY pay_time ) lag2_s
FROM
user_trade
WHERE
user_name IN ( 'King', 'West' );
-- 向下偏移
SELECT user_name,
pay_time,
lead(pay_time,1,pay_time) over(partition by user_name order by
pay_time) lead1,
lead(pay_time) over(partition by user_name order by pay_time)
lead2,
lead(pay_time,2,pay_time) over(partition by user_name order by
pay_time) lead3,
lead(pay_time,2) over(partition by user_name order by pay_time)
lead4
FROM user_trade
WHERE user_name in ('King','West');
--
SELECT count(DISTINCT user_name) FROM
(SELECT
user_name,
pay_time,
LEAD(pay_time,1) over(PARTITION by user_name ORDER BY pay_time) LEAD_time
FROM user_trade) A WHERE A.LEAD_time is not NULL
and DATEDIFF(LEAD_time,pay_time)>100;
SELECT count(DISTINCT user_name) FROM
(SELECT
user_name,
pay_time,
LEAD(pay_time,1) over(PARTITION by user_name ORDER BY pay_time) LEAD_time
FROM user_trade) A WHERE not ISNULL(A.LEAD_time)
and DATEDIFF(LEAD_time,pay_time)>100;
-- 需求11: 查询出每年支付时间间隔最长的用户
SELECT a.user_name,YEAR ,
DENSE_RANK() over(PARTITION by YEAR order dt_c)
FROM(
SELECT
user_name,
YEAR(pay_time) YEAR,
DATEDIFF(LEAD(pay_time,1) over(PARTITION by YEAR(pay_time)ORDER BY pay_time asc) ,pay_time) dt_c
FROM user_trade
) a;
-- -----------------------
SELECT * FROM (
SELECT
user_name,
YEAR,
DATEDIFF(lead_time,pay_time),
RANK() over(PARTITION by YEAR ORDER BY DATEDIFF(lead_time,pay_time) desc) rank_l
FROM(
SELECT
user_name,
YEAR(pay_time) YEAR,
pay_time,
LEAD(pay_time,1) over(PARTITION by user_name,YEAR(pay_time) ORDER BY pay_time asc) lead_time
FROM user_trade
) a
WHERE DATEDIFF(lead_time,pay_time) is not NULL)b WHERE rank_l=1 ;