create table if not exists user_purchase_detail(
name string ,
date string,
amount int,
ip_address string
)
row format delimited fields terminated by ',';
load data local inpath '/opt/data/user_purchase_detail.txt' overwrite into table user_purchase_detail;
3.指标统计
(1)用户购买明细及上次的购买时间
# SQL:
select name,date,amount,ip_address,
lag(date) over(partition by name order by date) last_date from user_purchase_detail;
# 结果:
name date amount ip_address last_date
lisi 2021-05-10 150 10.10.10.12 NULL
lisi 2021-06-12 234 10.10.10.12 2021-05-10
Semi 2021-02-03 46 10.10.10.9 NULL
Semi 2021-04-06 235 10.10.10.9 2021-02-03
Semi 2021-09-01 500 10.10.10.9 2021-04-06
Semi 2021-09-05 345 10.10.10.9 2021-09-01
Semi 2021-09-08 55 10.10.10.9 2021-09-05
Edward 2021-09-02 3500 10.10.10.10 NULL
Edward 2021-09-04 578 10.10.10.10 2021-09-02
Edward 2021-09-07 78 10.10.10.10 2021-09-04
zhangsan 2021-04-08 783 10.10.10.11 NULL
zhangsan 2021-04-09 123 10.10.10.11 2021-04-08
zhangsan 2021-04-11 456 10.10.10.11 2021-04-09
zhangsan 2021-04-13 99 10.10.10.11 2021-04-11
Time taken: 6.126 seconds, Fetched: 14 row(s)
(2)用户购买明细及下次的购买时间
#SQL:
select name,date,amount,ip_address,
lead(date) over(partition by name order by date) next_date from user_purchase_detail;
#结果:
name date amount ip_address next_date
lisi 2021-05-10 150 10.10.10.12 2021-06-12
lisi 2021-06-12 234 10.10.10.12 NULL
Semi 2021-02-03 46 10.10.10.9 2021-04-06
Semi 2021-04-06 235 10.10.10.9 2021-09-01
Semi 2021-09-01 500 10.10.10.9 2021-09-05
Semi 2021-09-05 345 10.10.10.9 2021-09-08
Semi 2021-09-08 55 10.10.10.9 NULL
Edward 2021-09-02 3500 10.10.10.10 2021-09-04
Edward 2021-09-04 578 10.10.10.10 2021-09-07
Edward 2021-09-07 78 10.10.10.10 NULL
zhangsan 2021-04-08 783 10.10.10.11 2021-04-09
zhangsan 2021-04-09 123 10.10.10.11 2021-04-11
zhangsan 2021-04-11 456 10.10.10.11 2021-04-13
zhangsan 2021-04-13 99 10.10.10.11 NULL
Time taken: 8.26 seconds, Fetched: 14 row(s)
(3)用户购买明细及本月第一次购买的时间
# SQL:
select name,date,amount,ip_address,
first_value(date) over (partition by name,substr(date,6,2) order by date) first_purchase from user_purchase_detail;
# 结果:
name date amount ip_address first_purchase
lisi 2021-05-10 150 10.10.10.12 2021-05-10
lisi 2021-06-12 234 10.10.10.12 2021-06-12
Semi 2021-02-03 46 10.10.10.9 2021-02-03
Semi 2021-04-06 235 10.10.10.9 2021-04-06
Semi 2021-09-01 500 10.10.10.9 2021-09-01
Semi 2021-09-05 345 10.10.10.9 2021-09-01
Semi 2021-09-08 55 10.10.10.9 2021-09-01
Edward 2021-09-02 3500 10.10.10.10 2021-09-02
Edward 2021-09-04 578 10.10.10.10 2021-09-02
Edward 2021-09-07 78 10.10.10.10 2021-09-02
zhangsan 2021-04-08 783 10.10.10.11 2021-04-08
zhangsan 2021-04-09 123 10.10.10.11 2021-04-08
zhangsan 2021-04-11 456 10.10.10.11 2021-04-08
zhangsan 2021-04-13 99 10.10.10.11 2021-04-08
Time taken: 72.55 seconds, Fetched: 14 row(s)
(4)用户购买明细及本月最后一次购买的时间
# SQL:
select name,date,amount,ip_address,
first_value(date) over (partition by name,substr(date,6,2) order by date desc) last_purchase from user_purchase_detail;
# 结果:
name date amount ip_address last_purchase
lisi 2021-05-10 150 10.10.10.12 2021-05-10
lisi 2021-06-12 234 10.10.10.12 2021-06-12
Semi 2021-02-03 46 10.10.10.9 2021-02-03
Semi 2021-04-06 235 10.10.10.9 2021-04-06
Semi 2021-09-08 55 10.10.10.9 2021-09-08
Semi 2021-09-05 345 10.10.10.9 2021-09-08
Semi 2021-09-01 500 10.10.10.9 2021-09-08
Edward 2021-09-07 78 10.10.10.10 2021-09-07
Edward 2021-09-04 578 10.10.10.10 2021-09-07
Edward 2021-09-02 3500 10.10.10.10 2021-09-07
zhangsan 2021-04-13 99 10.10.10.11 2021-04-13
zhangsan 2021-04-11 456 10.10.10.11 2021-04-13
zhangsan 2021-04-09 123 10.10.10.11 2021-04-13
zhangsan 2021-04-08 783 10.10.10.11 2021-04-13
# last_value是默认到当前行,给一个到最后一行的窗口尺寸即可.
select name,date,amount,ip_address,last_value(date) over (partition by name,month(date) order by date ROWS BETWEEN CURRENT ROW AND UNBOUNDED FOLLOWING) last_purchase from user_purchase_detail;
name date amount ip_address last_purchase
lisi 2021-05-10 150 10.10.10.12 2021-05-10
lisi 2021-06-12 234 10.10.10.12 2021-06-12
Semi 2021-02-03 46 10.10.10.9 2021-02-03
Semi 2021-04-06 235 10.10.10.9 2021-04-06
Semi 2021-09-01 500 10.10.10.9 2021-09-08
Semi 2021-09-05 345 10.10.10.9 2021-09-08
Semi 2021-09-08 55 10.10.10.9 2021-09-08
Edward 2021-09-02 3500 10.10.10.10 2021-09-07
Edward 2021-09-04 578 10.10.10.10 2021-09-07
Edward 2021-09-07 78 10.10.10.10 2021-09-07
zhangsan 2021-04-08 783 10.10.10.11 2021-04-13
zhangsan 2021-04-09 123 10.10.10.11 2021-04-13
zhangsan 2021-04-11 456 10.10.10.11 2021-04-13
zhangsan 2021-04-13 99 10.10.10.11 2021-04-13
Time taken: 20.816 seconds, Fetched: 14 row(s)
#SQL:
select name,date,amount,ip_address,sum(amount) over(partition by name order by date) sum_amount from user_purchase_detail;
#结果:
name date amount ip_address sum_amount
lisi 2021-05-10 150 10.10.10.12 150
lisi 2021-06-12 234 10.10.10.12 384
Semi 2021-02-03 46 10.10.10.9 46
Semi 2021-04-06 235 10.10.10.9 281
Semi 2021-09-01 500 10.10.10.9 781
Semi 2021-09-05 345 10.10.10.9 1126
Semi 2021-09-08 55 10.10.10.9 1181
Edward 2021-09-02 3500 10.10.10.10 3500
Edward 2021-09-04 578 10.10.10.10 4078
Edward 2021-09-07 78 10.10.10.10 4156
zhangsan 2021-04-08 783 10.10.10.11 783
zhangsan 2021-04-09 123 10.10.10.11 906
zhangsan 2021-04-11 456 10.10.10.11 1362
zhangsan 2021-04-13 99 10.10.10.11 1461
Time taken: 6.109 seconds, Fetched: 14 row(s)
(7)用户购买明细及最近三次的总额
#思路:
// 第一:分组排序 partition by name order by date
// 第二:计算总额
// 默认 当前行+上一行+下一行
// 如果 上一行为null 则为 当前行+下两行
// 如果 下一行为null 则为 当前行+上两行
#SQL:
select name,date,amount,ip_address,
sum(amount+if(lag_amount == 0,lead2_amount,lag_amount)+if(lead_amount == 0,lag2_amount,lead_amount)) total_count
from
(select name,date,amount,ip_address,
lag(amount,1,0) over(partition by name order by date) lag_amount,
lead(amount,1,0) over(partition by name order by date) lead_amount,
lag(amount,2,0) over(partition by name order by date) lag2_amount,
lead(amount,2,0) over(partition by name order by date) lead2_amount
from user_purchase_detail)
a group by name,date,amount,ip_address;
#结果:
name date amount ip_address total_count
lisi 2021-05-10 150 10.10.10.12 384
lisi 2021-06-12 234 10.10.10.12 384
Semi 2021-02-03 46 10.10.10.9 781
Semi 2021-04-06 235 10.10.10.9 781
Semi 2021-09-01 500 10.10.10.9 1080
Semi 2021-09-05 345 10.10.10.9 900
Semi 2021-09-08 55 10.10.10.9 900
Edward 2021-09-02 3500 10.10.10.10 4156
Edward 2021-09-04 578 10.10.10.10 4156
Edward 2021-09-07 78 10.10.10.10 4156
zhangsan 2021-04-08 783 10.10.10.11 1362
zhangsan 2021-04-09 123 10.10.10.11 1362
zhangsan 2021-04-11 456 10.10.10.11 678
zhangsan 2021-04-13 99 10.10.10.11 678
(8)查询前30%时间的订单信息
#SQL:
select name,date,amount,ip_address from
(select name,date,amount,ip_address,
round(CUME_DIST () over(order by date),2) percent
from user_purchase_detail
) a where percent <=0.3;
#结果:
name date amount ip_address percent
Semi 2021-02-03 46 10.10.10.9 0.07
Semi 2021-04-06 235 10.10.10.9 0.14
zhangsan 2021-04-08 783 10.10.10.11 0.21
zhangsan 2021-04-09 123 10.10.10.11 0.29
Time taken: 4.073 seconds, Fetched: 4 row(s)