hive中判断A表时间字段是否在B表的两个时间字段中及求订单中间休息时间

问题:在hive中,A表中有一个时间的字段,类型位string,格式为2018-12-0 13:34:12;在B表中有字段start_time和end_time,类型为string,格式为2018-12-06 13:34:12,先需要将两表按id关联后新增一个标记字段(当A表的时间在B表的start_time和end_time之间就标记为1,不在区间内就标记为0),B表中还有一个订单id,string类型

解决方法:使用join和case when then和lead窗口函数
先在hive中创建两张表并插入模拟数据:

-- 创建表
create table A (
plate_num string,
time string
);
create table B (
order_id string,
plate_num string,
start_time string,
end_time string
);

-- 插入模拟数据
insert into A values('泸A88888', '2018-04-02 09:34:12');
insert into A values('泸A66666', '2018-04-02 12:34:12');
insert into A values('泸A00000', '2018-04-02 10:34:12');
insert into A values('泸A22222', '2018-04-02 15:34:12');
insert into B values('201812060001', '泸A88888', '2018-04-02 09:00:12', '2018-04-02 10:00:12');
insert into B values('201812060002', '泸A66666', '2018-04-02 09:10:12', '2018-04-02 10:00:12');
insert into B values('201812060003', '泸A99999', '2018-04-02 09:15:12', '2018-04-02 09:55:12');
insert into B values('201812060004', '泸A88888', '2018-04-02 10:05:12', '2018-04-02 10:30:12');
insert into B values('201812060005', '泸A22222', '2018-04-02 10:10:12', '2018-04-02 10:44:12');
insert into B values('201812060006', '泸A99999', '2018-04-02 10:15:12', '2018-04-02 10:34:12');
insert into B values('201812060007', '泸A66666', '2018-04-02 10:20:12', '2018-04-02 10:30:12');
insert into B values('201812060008', '泸A66666', '2018-04-02 10:35:12', '2018-04-02 10:40:12');
insert into B values('201812060009', '泸A00000', '2018-04-02 10:35:12', '2018-04-02 10:45:12');
insert into B values('201812060010', '泸A99999', '2018-04-02 10:36:12', '2018-04-02 10:45:12');
insert into B values('201812060011', '泸A22222', '2018-04-02 10:50:12', '2018-04-02 11:20:12');

使用case when then 求表A的时间是否在表B的开始和结束时间之间,在标记为1,不在标记为0

-- 查找结果
-- 方法一:不将时间字段转换为时间戳
select 
A.plate_num plate_num, 
A.time time, 
B.start_time start_time, 
B.end_time end_time,
case when unix_timestamp(A.time) between unix_timestamp(B.start_time) and unix_timestamp(B.end_time) then 1 else 0 end sign
from 
A
join
B
on
A.plate_num = B.plate_num;

-- 结果:
OK
泸A88888	2018-04-02 09:34:12	2018-04-02 09:00:12	2018-04-02 10:00:12	1
泸A22222	2018-04-02 15:34:12	2018-04-02 10:50:12	2018-04-02 11:20:12	0
泸A88888	2018-04-02 09:34:12	2018-04-02 10:05:12	2018-04-02 10:30:12	0
泸A22222	2018-04-02 15:34:12	2018-04-02 10:10:12	2018-04-02 10:44:12	0
泸A00000	2018-04-02 10:34:12	2018-04-02 10:35:12	2018-04-02 10:45:12	0
泸A66666	2018-04-02 12:34:12	2018-04-02 09:10:12	2018-04-02 10:00:12	0
泸A66666	2018-04-02 12:34:12	2018-04-02 10:20:12	2018-04-02 10:30:12	0
泸A66666	2018-04-02 12:34:12	2018-04-02 10:35:12	2018-04-02 10:40:12	0
Time taken: 9.907 seconds, Fetched: 8 row(s)


-- 方法二:将时间字段转换为时间戳
select 
A.plate_num plate_num, 
A.time time, 
B.start_time start_time, 
B.end_time end_time,
case when A.time between B.start_time and B.end_time then 1 else 0 end sign
from 
A
join
B
on
A.plate_num = B.plate_num;

-- 结果:  
OK
泸A88888	2018-04-02 09:34:12	2018-04-02 09:00:12	2018-04-02 10:00:12	1
泸A22222	2018-04-02 15:34:12	2018-04-02 10:50:12	2018-04-02 11:20:12	0
泸A88888	2018-04-02 09:34:12	2018-04-02 10:05:12	2018-04-02 10:30:12	0
泸A22222	2018-04-02 15:34:12	2018-04-02 10:10:12	2018-04-02 10:44:12	0
泸A00000	2018-04-02 10:34:12	2018-04-02 10:35:12	2018-04-02 10:45:12	0
泸A66666	2018-04-02 12:34:12	2018-04-02 09:10:12	2018-04-02 10:00:12	0
泸A66666	2018-04-02 12:34:12	2018-04-02 10:20:12	2018-04-02 10:30:12	0
泸A66666	2018-04-02 12:34:12	2018-04-02 10:35:12	2018-04-02 10:40:12	0
Time taken: 0.985 seconds, Fetched: 8 row(s)

求订单中间的休息时间,使用lead的窗口函数

select 
t.order_id, t.plate_num, t.start_time, t.end_time,
(unix_timestamp(t.start_time1) - unix_timestamp(t.end_time)) as interval_time
from(
select
*,
-- 根据车牌号分区,开始时间升序取后一个订单的开始时间
lead(start_time, 1, 0) over(partition by plate_num order by start_time) start_time1
from b) t;

-- 结果
OK
201812060009	泸A00000	2018-04-02 10:35:12	2018-04-02 10:45:12	NULL
201812060005	泸A22222	2018-04-02 10:10:12	2018-04-02 10:44:12	360
201812060011	泸A22222	2018-04-02 10:50:12	2018-04-02 11:20:12	NULL
201812060002	泸A66666	2018-04-02 09:10:12	2018-04-02 10:00:12	1200
201812060007	泸A66666	2018-04-02 10:20:12	2018-04-02 10:30:12	300
201812060008	泸A66666	2018-04-02 10:35:12	2018-04-02 10:40:12	NULL
201812060001	泸A88888	2018-04-02 09:00:12	2018-04-02 10:00:12	300
201812060004	泸A88888	2018-04-02 10:05:12	2018-04-02 10:30:12	NULL
201812060003	泸A99999	2018-04-02 09:15:12	2018-04-02 09:55:12	1200
201812060006	泸A99999	2018-04-02 10:15:12	2018-04-02 10:34:12	120
201812060010	泸A99999	2018-04-02 10:36:12	2018-04-02 10:45:12	NULL
Time taken: 9.289 seconds, Fetched: 11 row(s)

  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值