背景: 业务中,遇到这样一个需求:需要判断在前一个节点业务操作后(取前一节点的操作时间),判断后一节点操作是否在3个小时内完成,其中晚上17:00到第二天早上8:00不纳入计算范围。
方法一:
思路:首先判断前一节点操作时间是否临近剔除范围,即使用hour
函数判断小时+3是否在17:00到第二天早上8:00范围。如果在,后一节点时间-前一节点时间-13进行判断,如果不在用后一节点时间-前一节点时间进行判断。
select
*
from
tableName
where if((hour(前一节点时间)+3>19 or hour(前一节点时间)+3<8),unix_timestamp(后一节点时间)-unix_timestamp(前一节点时间)-13*60*60,unix_timestamp(后一节点时间)-unix_timestamp(前一节点时间)) > 3*60*60
弊端:逻辑有漏洞,针对开始时间与结束时间差在一天内的效果比较好。
方法二:
思路:对开始时间和结束时间进行一步处理,再计算时间差。如果开始时间在剔除起点之前就用剔除起点替代,如果开始时间在剔除终点之后就用后一天的剔除起点替代;如果结束时间在剔除终点之后就用剔除终点替代,如果结束时间在剔除起点之前就用前一天剔除终点替代。最终时间差用处理后结束时间-处理后开始时间-相差天数*(剔除终点-剔除起点)
优势:计算准确,而且可以剔除法定节假日,但是计算量比较大,而且如果开始时间和结束时间都在假期内可能会出现负数。
-- 日期处理表
-- 法定节假日对应最近工作日
create table dm.dm_legal_holiday_next_workday_mf(
date_id string comment '节假日日期',
start_date string comment '节假日开始日期',
next_workday string comment '下一法定工作日'
)comment '法定节假日对应最近工作日'
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
TBLPROPERTIES('parquet.compression'='SNAPPY')
insert overwrite table dm.dm_legal_holiday_next_workday_mf
select
datestr,
null,
next_date
from(
select
datestr,
if(weekid = 6,'Y',legal_workday_flag) as legal_workday_flag,
lead(legal_workday_flag) over(order by date_id) as next_flag,
lead(datestr) over(order by date_id) as next_date
from
dm.dim_date
where legal_workday_flag is not null
)t
where legal_workday_flag = 'N'
and next_flag = 'Y'
select * from dm.dim_date where legal_workday_flag is not null
insert overwrite table dm.dm_legal_holiday_next_workday_mf
select
t1.datestr,
null,
coalesce(t2.next_workday,lead(t2.next_workday) over(order by t1.datestr),lead(t2.next_workday,2) over(order by t1.datestr),lead(t2.next_workday,3) over(order by t1.datestr),lead(t2.next_workday,4) over(order by t1.datestr),lead(t2.next_workday,5) over(order by t1.datestr),lead(t2.next_workday,6) over(order by t1.datestr),lead(t2.next_workday,7) over(order by t1.datestr),lead(t2.next_workday,8) over(order by t1.datestr),lead(t2.next_workday,9) over(order by t1.datestr),lead(t2.next_workday,10) over(order by t1.datestr)) as next_workday
from(
select
datestr
from
dm.dim_date
where if(weekid = 6,'Y',legal_workday_flag) = 'N'
)t1
left join
dm.dm_legal_holiday_next_workday_mf t2
on t1.datestr = t2.date_id
insert overwrite table dm.dm_legal_holiday_next_workday_mf
select
t1.date_id,
t2.start_date,
t1.next_workday
from
dm.dm_legal_holiday_next_workday_mf t1
left join(
select
next_workday,
min(date_id) as start_date
from
dm.dm_legal_holiday_next_workday_mf
group by next_workday
)t2
on t1.next_workday = t2.next_workday
-- 剔除17:00-次日9:00
select
*
,(unix_timestamp(end_time_handle)-unix_timestamp(start_time_handle)-datediff(end_time_handle,start_time_handle)*16*60*60)/3600 as diff_hour
from(
select
t.*
,case
when t1.date_id is not null then concat(t1.next_workday,' ','09:00:00')
when date_format(start_time,'HH:mm') < '09:00' then concat(substr(start_time,1,10),' ','09:00:00')
when date_format(start_time,'HH:mm') > '17:00' then concat(date_add(substr(start_time,1,10),1),' ','09:00:00')
else start_time
end as start_time_handle
,case
when t2.date_id is not null then concat(t2.start_date,' ','17:00:00')
when date_format(end_time,'HH:mm') < '09:00' then concat(date_sub(substr(end_time,1,10),1),' ','17:00:00')
when date_format(end_time,'HH:mm') > '17:00' then concat(substr(end_time,1,10),' ','17:00:00')
else end_time
end as end_time_handle
from(
select '2024-01-01 08:00:00' as start_time,'2024-01-03 21:00:00' as end_time
union all
select '2024-01-01 10:20:00' as start_time,'2024-01-01 15:00:21' as end_time
union all
select '2024-02-01 19:00:00' as start_time,'2024-02-03 12:21:00' as end_time
union all
select '2024-02-01 09:10:00' as start_time,'2024-02-01 12:21:00' as end_time
union all
select '2024-02-01 14:10:00' as start_time,'2024-02-02 12:21:00' as end_time
union all
select '2024-02-01 19:10:00' as start_time,'2024-02-01 21:21:00' as end_time
union all
select '2024-02-01 19:10:00' as start_time,'2024-02-02 03:21:00' as end_time
union all
select '2024-02-01 08:10:00' as start_time,'2024-02-01 16:59:00' as end_time
)t
left join
dm.dm_legal_holiday_next_workday_mf t1
on substr(t.start_time,1,10) = t1.date_id
left join
dm.dm_legal_holiday_next_workday_mf t2
on substr(t.end_time,1,10) = t2.date_id
)tt