问题:在hive中,A表中有一个时间的字段,类型位string,格式为2018-12-0 13:34:12;在B表中有字段start_time和end_time,类型为string,格式为2018-12-06 13:34:12,先需要将两表按id关联后新增一个标记字段(当A表的时间在B表的start_time和end_time之间就标记为1,不在区间内就标记为0),B表中还有一个订单id,string类型
解决方法:使用join和case when then和lead窗口函数
先在hive中创建两张表并插入模拟数据:
-- 创建表
create table A (
plate_num string,
time string
);
create table B (
order_id string,
plate_num string,
start_time string,
end_time string
);
-- 插入模拟数据
insert into A values('泸A88888', '2018-04-02 09:34:12');
insert into A values('泸A66666', '2018-04-02 12:34:12');
insert into A values('泸A00000', '2018-04-02 10:34:12');
insert into A values('泸A22222', '2018-04-02 15:34:12');
insert into B values('201812060001', '泸A88888', '2018-04-02 09:00:12', '2018-04-02 10:00:12');
insert into B values('201812060002', '泸A66666', '2018-04-02 09:10:12', '2018-04-02 10:00:12');
insert into B values('201812060003', '泸A99999', '2018-04-02 09:15:12', '2018-04-02 09:55:12');
insert into B values('201812060004', '泸A88888', '2018-04-02 10:05:12', '2018-04-02 10:30:12');
insert into B values('201812060005', '泸A22222', '2018-04-02 10:10:12', '2018-04-02 10:44:12');
insert into B values('201812060006', '泸A99999', '2018-04-02 10:15:12', '2018-04-02 10:34:12');
insert into B values('201812060007', '泸A66666', '2018-04-02 10:20:12', '2018-04-02 10:30:12');
insert into B values('201812060008', '泸A66666', '2018-04-02 10:35:12', '2018-04-02 10:40:12');
insert into B values('201812060009', '泸A00000', '2018-04-02 10:35:12', '2018-04-02 10:45:12');
insert into B values('201812060010', '泸A99999', '2018-04-02 10:36:12', '2018-04-02 10:45:12');
insert into B values('201812060011', '泸A22222', '2018-04-02 10:50:12', '2018-04-02 11:20:12');
使用case when then 求表A的时间是否在表B的开始和结束时间之间,在标记为1,不在标记为0
-- 查找结果
-- 方法一:不将时间字段转换为时间戳
select
A.plate_num plate_num,
A.time time,
B.start_time start_time,
B.end_time end_time,
case when unix_timestamp(A.time) between unix_timestamp(B.start_time) and unix_timestamp(B.end_time) then 1 else 0 end sign
from
A
join
B
on
A.plate_num = B.plate_num;
-- 结果:
OK
泸A88888 2018-04-02 09:34:12 2018-04-02 09:00:12 2018-04-02 10:00:12 1
泸A22222 2018-04-02 15:34:12 2018-04-02 10:50:12 2018-04-02 11:20:12 0
泸A88888 2018-04-02 09:34:12 2018-04-02 10:05:12 2018-04-02 10:30:12 0
泸A22222 2018-04-02 15:34:12 2018-04-02 10:10:12 2018-04-02 10:44:12 0
泸A00000 2018-04-02 10:34:12 2018-04-02 10:35:12 2018-04-02 10:45:12 0
泸A66666 2018-04-02 12:34:12 2018-04-02 09:10:12 2018-04-02 10:00:12 0
泸A66666 2018-04-02 12:34:12 2018-04-02 10:20:12 2018-04-02 10:30:12 0
泸A66666 2018-04-02 12:34:12 2018-04-02 10:35:12 2018-04-02 10:40:12 0
Time taken: 9.907 seconds, Fetched: 8 row(s)
-- 方法二:将时间字段转换为时间戳
select
A.plate_num plate_num,
A.time time,
B.start_time start_time,
B.end_time end_time,
case when A.time between B.start_time and B.end_time then 1 else 0 end sign
from
A
join
B
on
A.plate_num = B.plate_num;
-- 结果:
OK
泸A88888 2018-04-02 09:34:12 2018-04-02 09:00:12 2018-04-02 10:00:12 1
泸A22222 2018-04-02 15:34:12 2018-04-02 10:50:12 2018-04-02 11:20:12 0
泸A88888 2018-04-02 09:34:12 2018-04-02 10:05:12 2018-04-02 10:30:12 0
泸A22222 2018-04-02 15:34:12 2018-04-02 10:10:12 2018-04-02 10:44:12 0
泸A00000 2018-04-02 10:34:12 2018-04-02 10:35:12 2018-04-02 10:45:12 0
泸A66666 2018-04-02 12:34:12 2018-04-02 09:10:12 2018-04-02 10:00:12 0
泸A66666 2018-04-02 12:34:12 2018-04-02 10:20:12 2018-04-02 10:30:12 0
泸A66666 2018-04-02 12:34:12 2018-04-02 10:35:12 2018-04-02 10:40:12 0
Time taken: 0.985 seconds, Fetched: 8 row(s)
求订单中间的休息时间,使用lead的窗口函数
select
t.order_id, t.plate_num, t.start_time, t.end_time,
(unix_timestamp(t.start_time1) - unix_timestamp(t.end_time)) as interval_time
from(
select
*,
-- 根据车牌号分区,开始时间升序取后一个订单的开始时间
lead(start_time, 1, 0) over(partition by plate_num order by start_time) start_time1
from b) t;
-- 结果
OK
201812060009 泸A00000 2018-04-02 10:35:12 2018-04-02 10:45:12 NULL
201812060005 泸A22222 2018-04-02 10:10:12 2018-04-02 10:44:12 360
201812060011 泸A22222 2018-04-02 10:50:12 2018-04-02 11:20:12 NULL
201812060002 泸A66666 2018-04-02 09:10:12 2018-04-02 10:00:12 1200
201812060007 泸A66666 2018-04-02 10:20:12 2018-04-02 10:30:12 300
201812060008 泸A66666 2018-04-02 10:35:12 2018-04-02 10:40:12 NULL
201812060001 泸A88888 2018-04-02 09:00:12 2018-04-02 10:00:12 300
201812060004 泸A88888 2018-04-02 10:05:12 2018-04-02 10:30:12 NULL
201812060003 泸A99999 2018-04-02 09:15:12 2018-04-02 09:55:12 1200
201812060006 泸A99999 2018-04-02 10:15:12 2018-04-02 10:34:12 120
201812060010 泸A99999 2018-04-02 10:36:12 2018-04-02 10:45:12 NULL
Time taken: 9.289 seconds, Fetched: 11 row(s)