Hive计算时间差剔除一天中某时间段

背景: 业务中,遇到这样一个需求:需要判断在前一个节点业务操作后(取前一节点的操作时间),判断后一节点操作是否在3个小时内完成,其中晚上17:00到第二天早上8:00不纳入计算范围。

方法一:

思路:首先判断前一节点操作时间是否临近剔除范围,即使用hour函数判断小时+3是否在17:00到第二天早上8:00范围。如果在,后一节点时间-前一节点时间-13进行判断,如果不在用后一节点时间-前一节点时间进行判断。

select
	*
from 
	tableName
where if((hour(前一节点时间)+3>19 or hour(前一节点时间)+3<8),unix_timestamp(后一节点时间)-unix_timestamp(前一节点时间)-13*60*60,unix_timestamp(后一节点时间)-unix_timestamp(前一节点时间)) > 3*60*60

弊端:逻辑有漏洞,针对开始时间与结束时间差在一天内的效果比较好。

方法二:

思路:对开始时间和结束时间进行一步处理,再计算时间差。如果开始时间在剔除起点之前就用剔除起点替代,如果开始时间在剔除终点之后就用后一天的剔除起点替代;如果结束时间在剔除终点之后就用剔除终点替代,如果结束时间在剔除起点之前就用前一天剔除终点替代。最终时间差用处理后结束时间-处理后开始时间-相差天数*(剔除终点-剔除起点)
优势:计算准确,而且可以剔除法定节假日,但是计算量比较大,而且如果开始时间和结束时间都在假期内可能会出现负数。

-- 日期处理表
-- 法定节假日对应最近工作日
create table dm.dm_legal_holiday_next_workday_mf(
	date_id string comment '节假日日期',
	start_date string comment '节假日开始日期',
	next_workday string comment '下一法定工作日'
)comment '法定节假日对应最近工作日'
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
TBLPROPERTIES('parquet.compression'='SNAPPY')


insert overwrite table dm.dm_legal_holiday_next_workday_mf
select 
	datestr,
	null,
	next_date
from(
select
	datestr,
	if(weekid = 6,'Y',legal_workday_flag) as legal_workday_flag,
	lead(legal_workday_flag) over(order by date_id) as next_flag,
	lead(datestr) over(order by date_id) as next_date
from 
	dm.dim_date
where legal_workday_flag is not null
)t
where legal_workday_flag = 'N'
and next_flag = 'Y'

select * from dm.dim_date where legal_workday_flag is not null

insert overwrite table dm.dm_legal_holiday_next_workday_mf
select 
	t1.datestr,
	null,
	coalesce(t2.next_workday,lead(t2.next_workday) over(order by t1.datestr),lead(t2.next_workday,2) over(order by t1.datestr),lead(t2.next_workday,3) over(order by t1.datestr),lead(t2.next_workday,4) over(order by t1.datestr),lead(t2.next_workday,5) over(order by t1.datestr),lead(t2.next_workday,6) over(order by t1.datestr),lead(t2.next_workday,7) over(order by t1.datestr),lead(t2.next_workday,8) over(order by t1.datestr),lead(t2.next_workday,9) over(order by t1.datestr),lead(t2.next_workday,10) over(order by t1.datestr)) as next_workday
from(
select 
	datestr
from 
	dm.dim_date
where if(weekid = 6,'Y',legal_workday_flag) = 'N'
)t1
left join
	dm.dm_legal_holiday_next_workday_mf t2
on t1.datestr = t2.date_id

insert overwrite table dm.dm_legal_holiday_next_workday_mf
select
	t1.date_id,
	t2.start_date,
	t1.next_workday
from 
	dm.dm_legal_holiday_next_workday_mf t1
left join(
select
	next_workday,
	min(date_id) as start_date
from
	dm.dm_legal_holiday_next_workday_mf
group by next_workday
)t2
on t1.next_workday = t2.next_workday
-- 剔除17:00-次日9:00
select
	*
	,(unix_timestamp(end_time_handle)-unix_timestamp(start_time_handle)-datediff(end_time_handle,start_time_handle)*16*60*60)/3600 as diff_hour
from(
select
	t.*
	,case 
		when t1.date_id is not null then concat(t1.next_workday,' ','09:00:00')
		when date_format(start_time,'HH:mm') < '09:00' then concat(substr(start_time,1,10),' ','09:00:00')
		when date_format(start_time,'HH:mm') > '17:00' then concat(date_add(substr(start_time,1,10),1),' ','09:00:00')
		else start_time
	end as start_time_handle
	,case 
		when t2.date_id is not null then concat(t2.start_date,' ','17:00:00')
		when date_format(end_time,'HH:mm') < '09:00' then concat(date_sub(substr(end_time,1,10),1),' ','17:00:00')
		when date_format(end_time,'HH:mm') > '17:00' then concat(substr(end_time,1,10),' ','17:00:00')
		else end_time
	end as end_time_handle
from(
select '2024-01-01 08:00:00' as start_time,'2024-01-03 21:00:00' as end_time
union all
select '2024-01-01 10:20:00' as start_time,'2024-01-01 15:00:21' as end_time
union all
select '2024-02-01 19:00:00' as start_time,'2024-02-03 12:21:00' as end_time
union all
select '2024-02-01 09:10:00' as start_time,'2024-02-01 12:21:00' as end_time
union all
select '2024-02-01 14:10:00' as start_time,'2024-02-02 12:21:00' as end_time
union all
select '2024-02-01 19:10:00' as start_time,'2024-02-01 21:21:00' as end_time
union all
select '2024-02-01 19:10:00' as start_time,'2024-02-02 03:21:00' as end_time
union all
select '2024-02-01 08:10:00' as start_time,'2024-02-01 16:59:00' as end_time
)t
left join
	dm.dm_legal_holiday_next_workday_mf t1
on substr(t.start_time,1,10) = t1.date_id
left join
	dm.dm_legal_holiday_next_workday_mf t2
on substr(t.end_time,1,10) = t2.date_id
)tt

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值