🌿挑战100天不停更,刷爆 hive sql🧲
详情请点击🔗我的专栏🖲,共同学习,一起进步~
NUM: 第14天 -时效问题
🧨不废话,刷题~~🧨
🎈表结构
工作日:周一至周五 09:30-18:30
日期表:
客户申请表:
🎉建表
-- 日期表
create table d_date
(
date_id string,
is_work string
);
-- 客户申请表:t14
create table t14
(
a string,
b string,
c string
);
insert into d_date (date_id, is_work)
values ('2017-04-13', '1'),
('2017-04-14', '1'),
('2017-04-15', '0'),
('2017-04-16', '0'),
('2017-04-17', '1');
-- 工 作 日 :周一至周五 09:30-18:30
insert into t14 (a, b, c)
values ('1', '申请', '2017-04-14 18:03:00'),
('1', '通过', '2017-04-17 09:43:00'),
('2', '申请', '2017-04-13 17:02:00'),
('2', '通过', '2017-04-15 09:42:00');
👓计算上表中从申请到通过占用的工作时长
👙输出结果
🎨思路
- 将申请时间和通过时间分组并取出开始时间和截止时间
- 根据tmp1查询结果计算出通过和申请相差的时间戳和天数
- 通过开窗函数和炸裂函数取出排序及时间统计
- 根据查询结果查询 rn 和 ct
- 筛选is_work
- 查询占比
🧨SQL
select a,
-- 6,查询占比
round(sum(diff) / 3600, 2) as d
from (
-- 5,筛选is_work
select a,
apply_time,
pass_time,
dates,
rn,
ct,
is_work,
case
when is_work = 1 and rn = 1 then unix_timestamp(concat(dates, ' 18:30:00'), 'yyyy-MM-dd HH:mm:ss') -
unix_timestamp(apply_time, 'yyyy-MM-dd HH:mm:ss')
when is_work = 0 then 0
when is_work = 1 and rn = ct then unix_timestamp(pass_time, 'yyyy-MM-dd HH:m m:ss') -
unix_timestamp(concat(dates, ' 09:30:00'), 'yyyy-MM-dd HH:mm:ss')
when is_work = 1 and rn != ct then 9 * 3600 end diff
from (
-- 4,根据查询结果查询 rn 和 ct
select a,
apply_time,
pass_time,
time_diff,
day_diff,
rn,
ct,
date_add(start_time, rn - 1) dates
from (
-- 3,通过开窗函数和炸裂函数取出排序及时间统计
select a,
apply_time,
pass_time,
time_diff,
day_diff,
strs,
start_time,
row_number() over (partition by a) as rn,
count(*) over (partition by a) as ct
from (
select a,
apply_time,
pass_time,
time_diff,
day_diff,
-- repeat(字符串,10) 返回10 * 字符串相连接
substr(repeat(concat(substr(apply_time, 1, 10), ','), day_diff + 1), 1,
11 * (day_diff + 1) - 1) strs
from (
--2,根据tmp1查询结果计算出通过和申请相差的时间戳和天数
select a,
apply_time,
pass_time,
unix_timestamp(pass_time, 'yyyy-MM-dd HH:mm:ss') -
unix_timestamp(apply_time, 'yyyy-MM-dd HH:mm:ss') time_diff,
datediff(substr(pass_time, 1, 10), substr(apply_time, 1, 10)) day_diff
from (
-- 1,将申请时间和通过时间分组并取出开始时间和截止时间
select a,
max(case when b = '申请' then c end) apply_time,
max(case when b = '通过' then c end) pass_time
from t14
group by a) tmp1) tmp2
) tmp3 lateral view explode(split(strs, ",")) t as start_time
) tmp4) tmp5
join d_date on tmp5.dates = d_date.date_id) tmp6
group by a;