1.从订单信息表(order_info)中查询首次下单后第二天仍然下单的用户占所有下单用户的比例,结果保留一位小数,使用百分数显示,期望结果如下:
2.所用到的表和数据
--订单信息表
create table order_info
(
`order_id` string COMMENT '订单id',
`user_id` string COMMENT '用户id',
`create_date` string COMMENT '下单日期',
`total_amount` decimal(16, 2) COMMENT '订单总金额'
) COMMENT '订单表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
--数据装载
insert overwrite table order_info
values ('1', '101', '2021-09-27', 29000.00),
('2', '101', '2021-09-28', 70500.00),
('3', '101', '2021-09-29', 43300.00),
('4', '101', '2021-09-30', 860.00),
('5', '102', '2021-10-01', 46180.00),
('6', '102', '2021-10-01', 50000.00),
('7', '102', '2021-10-01', 75500.00),
('8', '102', '2021-10-02', 6170.00),
('9', '103', '2021-10-02', 18580.00),
('10', '103', '2021-10-02', 28000.00),
('11', '103', '2021-10-02', 23400.00),
('12', '103', '2021-10-03', 5910.00),
('13', '104', '2021-10-03', 13000.00),
('14', '104', '2021-10-03', 69500.00),
('15', '104', '2021-10-03', 2000.00),
('16', '104', '2021-10-03', 5380.00),
('17', '105', '2021-10-04', 6210.00),
('18', '105', '2021-10-04', 68000.00),
('19', '105', '2021-10-04', 43100.00),
('20', '105', '2021-10-04', 2790.00),
('21', '106', '2021-10-04', 9390.00),
('22', '106', '2021-10-05', 58000.00),
('23', '106', '2021-10-05', 46600.00),
('24', '106', '2021-10-05', 5160.00),
('25', '107', '2021-10-05', 55350.00),
('26', '107', '2021-10-05', 14500.00),
('27', '107', '2021-10-06', 47400.00),
('28', '107', '2021-10-06', 6900.00),
('29', '108', '2021-10-06', 56570.00),
('30', '108', '2021-10-06', 44500.00),
('31', '108', '2021-10-07', 50800.00),
('32', '108', '2021-10-07', 3900.00),
('33', '109', '2021-10-07', 41480.00),
('34', '109', '2021-10-07', 88000.00),
('35', '109', '2020-10-08', 15000.00),
('36', '109', '2020-10-08', 9020.00),
('37', '1010', '2020-10-08', 9260.00),
('38', '1010', '2020-10-08', 12000.00),
('39', '1010', '2020-10-08', 23900.00),
('40', '1010', '2020-10-08', 6790.00);
3.答案
思路一: rank开窗求前两次的消费记录
3.1根据user_id和create_date进行分组,过滤掉同一天多次消费的记录,使用rank开窗以用户进行分区,以时间进行排序的排名
select user_id,
create_date,
rank() over (
partition by
user_id
order by
create_date
) rk
from (
select user_id,
create_date
from order_info
group by user_id,
create_date
) t1;
运行结果
3.2筛选出排名小于等于2的记录,max、min得出第一次、第二次消费的日期
select user_id,
max(create_date) second_buy,
min(create_date) first_buy
from (
select user_id,
create_date,
rank() over (
partition by
user_id
order by
create_date
) rk
from (
select user_id,
create_date
from order_info
group by user_id,
create_date
) t1
) t2
where rk <= 2
group by user_id;
运行结果
3.3使用sum(if(datediff(),1,0)),datediff求两个日期的差值,如果差值为1,返回1,否则返回0,sum进行数值求和,得出满足条件的用户数,得出的结果除以总的用户数得出结果,再对其进行一个格式的转换就可以完成上述需求
select concat(
round(
sum(if(datediff(second_buy, first_buy) = 1, 1, 0)) / count(user_id) * 100,
1
),
"%"
) percentage
from (
select user_id,
max(create_date) second_buy,
min(create_date) first_buy
from (
select user_id,
create_date,
rank() over (
partition by
user_id
order by
create_date
) rk
from (
select user_id,
create_date
from order_info
group by user_id,
create_date
) t1
) t2
where rk <= 2
group by user_id
) t3;
运行结果
思路二:使用first_value()开窗,求第一次的消费记录,然后同样datediff()做差
3.1 根据user_id和create_date进行分组,过滤掉同一天多次消费的记录,使用first_value()开窗得出第一次的消费日期
select user_id,
create_date,
first_value(create_date, false) over (
partition by
user_id
order by
create_date
) first_create_date
from (
select user_id,
create_date
from order_info
group by user_id,
create_date
) t1;
运行结果
3.2 使用sum(if(datediff(),1,0)),datediff求两个日期的差值,如果差值为1,返回1,否则返回0,sum进行数值求和,得出满足条件的用户数,得出的结果除以总的用户数得出结果,再对其进行一个格式的转换就可以完成上述需求
select concat(
round(
sum(if(datediff(create_date, first_create_date) = 1, 1, 0)) / count(distinct user_id) *
100,
1
),
"%"
) percentage
from (
select user_id,
create_date,
first_value(create_date, false) over (
partition by
user_id
order by
create_date
) first_create_date
from (
select user_id,
create_date
from order_info
group by user_id,
create_date
) t1
) t2;
运行结果