1.订单配送中,如果期望配送日期和下单日期相同,称为即时订单,如果期望配送日期和下单日期不同,称为计划订单。请从配送信息表(delivery_info)中求出每个用户的首单(用户的第一个订单)中即时订单的比例,保留两位小数,以小数形式显示
需求结果
2.所用到的表和数据
--配送信息表
CREATE TABLE delivery_info
(
`delivery_id` string comment '配送单id',
`order_id` string comment '订单id',
`user_id` string comment '用户id',
`order_date` string comment '下单日期',
`custom_date` string comment '期望配送日期'
) COMMENT '邮寄信息表'
ROW FORMAT DELIMITED FIELDS TERMINATED BY '\t';
--数据装载
insert overwrite table delivery_info
values ('1', '1', '101', '2021-09-27', '2021-09-29'),
('2', '2', '101', '2021-09-28', '2021-09-28'),
('3', '3', '101', '2021-09-29', '2021-09-30'),
('4', '4', '101', '2021-09-30', '2021-10-01'),
('5', '5', '102', '2021-10-01', '2021-10-01'),
('6', '6', '102', '2021-10-01', '2021-10-01'),
('7', '7', '102', '2021-10-01', '2021-10-03'),
('8', '8', '102', '2021-10-02', '2021-10-02'),
('9', '9', '103', '2021-10-02', '2021-10-03'),
('10', '10', '103', '2021-10-02', '2021-10-04'),
('11', '11', '103', '2021-10-02', '2021-10-02'),
('12', '12', '103', '2021-10-03', '2021-10-03'),
('13', '13', '104', '2021-10-03', '2021-10-04'),
('14', '14', '104', '2021-10-03', '2021-10-04'),
('15', '15', '104', '2021-10-03', '2021-10-03'),
('16', '16', '104', '2021-10-03', '2021-10-03'),
('17', '17', '105', '2021-10-04', '2021-10-04'),
('18', '18', '105', '2021-10-04', '2021-10-06'),
('19', '19', '105', '2021-10-04', '2021-10-06'),
('20', '20', '105', '2021-10-04', '2021-10-04'),
('21', '21', '106', '2021-10-04', '2021-10-04'),
('22', '22', '106', '2021-10-05', '2021-10-05'),
('23', '23', '106', '2021-10-05', '2021-10-05'),
('24', '24', '106', '2021-10-05', '2021-10-07'),
('25', '25', '107', '2021-10-05', '2021-10-05'),
('26', '26', '107', '2021-10-05', '2021-10-06'),
('27', '27', '107', '2021-10-06', '2021-10-06'),
('28', '28', '107', '2021-10-06', '2021-10-07'),
('29', '29', '108', '2021-10-06', '2021-10-06'),
('30', '30', '108', '2021-10-06', '2021-10-06'),
('31', '31', '108', '2021-10-07', '2021-10-09'),
('32', '32', '108', '2021-10-07', '2021-10-09'),
('33', '33', '109', '2021-10-07', '2021-10-08'),
('34', '34', '109', '2021-10-07', '2021-10-08'),
('35', '35', '109', '2021-10-08', '2021-10-10'),
('36', '36', '109', '2021-10-08', '2021-10-09'),
('37', '37', '1010', '2021-10-08', '2021-10-10'),
('38', '38', '1010', '2021-10-08', '2021-10-10'),
('39', '39', '1010', '2021-10-08', '2021-10-09'),
('40', '40', '1010', '2021-10-08', '2021-10-09');
3.答案
思路1:不使用rank()
3.1对同一用户同一天的下单记录且期望发货日期进行去重,保证后续rank()开窗结果排名不存在重复的排名
select user_id,
order_date,
custom_date
from delivery_info
group by user_id, order_date, custom_date;
运行结果
3.2rank()开窗,以user_id进行分区,order_date、custom_date进行升序排序,得出rk
select user_id,
order_date,
custom_date,
rank() over (
partition by
user_id
order by
order_date,custom_date
) rk
from (
select user_id,
order_date,
custom_date
from delivery_info
group by user_id, order_date, custom_date) t1;
运行结果
3.3筛选出rk = 1的记录,计算得出即时比率
select cast(
sum(if(order_date = custom_date, 1, 0)) / count(*) as decimal(16, 2)
) percentage
from (
select user_id,
order_date,
custom_date,
rank() over (
partition by
user_id
order by
order_date,custom_date
) rk
from (
select user_id,
order_date,
custom_date
from delivery_info
group by user_id, order_date, custom_date) t1) t2
where rk = 1;
运行结果
思路2:使用row_number()
3.1第一步,使用row_number()开窗,保证每个用户的第一名只有一个
select user_id,
order_date,
custom_date,
row_number() over (
partition by
user_id
order by
order_date,custom_date
) rn
from delivery_info;
运行结果
3.2第二步,筛选出rn = 1的记录,得出比率
select cast(
sum(if(order_date = custom_date, 1, 0)) / count(*) as decimal(16, 2)
) percentage
from (
select user_id,
order_date,
custom_date,
row_number() over (
partition by
user_id
order by
order_date,custom_date
) rn
from delivery_info
) t1
where rn = 1;
运行结果