数据如下
+---+-------------------+-------------------+----+
| id| start_time| end_time|flow|
+---+-------------------+-------------------+----+
| 1|2020-02-18 14:20:30|2020-02-18 14:46:30| 20|
| 1|2020-02-18 14:47:20|2020-02-18 15:20:30| 30|
| 1|2020-02-18 15:37:23|2020-02-18 16:05:26| 40|
| 1|2020-02-18 16:06:27|2020-02-18 17:20:49| 50|
| 1|2020-02-18 17:21:50|2020-02-18 18:03:27| 60|
| 2|2020-02-18 14:18:24|2020-02-18 15:01:40| 20|
| 2|2020-02-18 15:20:49|2020-02-18 15:30:24| 30|
| 2|2020-02-18 16:01:23|2020-02-18 16:40:32| 40|
| 2|2020-02-18 16:44:56|2020-02-18 17:40:52| 50|
| 3|2020-02-18 14:39:58|2020-02-18 15:35:53| 20|
| 3|2020-02-18 15:36:39|2020-02-18 15:24:54| 30|
+---+-------------------+-------------------+----+
select
uid,
min(start_time) start_time,
max(end_time) end_time,
sum(flow) flow
from
(
select
uid,
start_time,
end_time,
flow,
sum(flag) over(partition by uid order by start_time) sum_flag
from
(
select
uid,
start_time,
end_time,
flow,
if((to_unix_timestamp(start_time) - to_unix_timestamp(lag_time))/60 > 10, 1,0) flag -- 将日期转为时间戳,秒为单位
from
(
select
uid,
start_time,
end_time,
flow,
lag(end_time,1,start_time) over(partition by uid order by start_time) lag_time
from
tb_flowCount
)
)
)
group by uid,sum_flag
结果:
+---+-------------------+-------------------+---------+
|uid| start_time| end_time| flow|
+---+-------------------+-------------------+---------+
| 1|2020-02-18 14:20:30|2020-02-18 15:20:30| 50|
| 1|2020-02-18 15:37:23|2020-02-18 18:03:27| 150|
| 3|2020-02-18 14:39:58|2020-02-18 15:35:53| 50|
| 2|2020-02-18 14:18:24|2020-02-18 15:01:40| 20|
| 2|2020-02-18 15:20:49|2020-02-18 15:30:24| 30|
| 2|2020-02-18 16:01:23|2020-02-18 17:40:52| 90|
+---+-------------------+-------------------+---------+
过程解析
1
select
uid,
start_time,
end_time,
flow,
lag(end_time,1,start_time) over(partition by uid order by start_time) lag_time --将上一行的结束时间压到下一行
from
tb_flowCount
+---+-------------------+-------------------+----+----------------------+
|uid| start_time| end_time|flow| lag_time|
+---+-------------------+-------------------+----+----------------------+
| 1|2020-02-18 14:20:30|2020-02-18 14:46:30| 20| 2020-02-18 14:20:30|
| 1|2020-02-18 14:47:20|2020-02-18 15:20:30| 30| 2020-02-18 14:46:30|
| 1|2020-02-18 15:37:23|2020-02-18 16:05:26| 40| 2020-02-18 15:20:30|
| 1|2020-02-18 16:06:27|2020-02-18 17:20:49| 50| 2020-02-18 16:05:26|
| 1|2020-02-18 17:21:50|2020-02-18 18:03:27| 60| 2020-02-18 17:20:49|
| 3|2020-02-18 14:39:58|2020-02-18 15:35:53| 20| 2020-02-18 14:39:58|
| 3|2020-02-18 15:36:39|2020-02-18 15:24:54| 30| 2020-02-18 15:35:53|
| 2|2020-02-18 14:18:24|2020-02-18 15:01:40| 20| 2020-02-18 14:18:24|
| 2|2020-02-18 15:20:49|2020-02-18 15:30:24| 30| 2020-02-18 15:01:40|
| 2|2020-02-18 16:01:23|2020-02-18 16:40:32| 40| 2020-02-18 15:30:24|
| 2|2020-02-18 16:44:56|2020-02-18 17:40:52| 50| 2020-02-18 16:40:32|
+---+-------------------+-------------------+----+----------------------+
2
select
uid,
start_time,
end_time,
flow,
if((to_unix_timestamp(start_time) - to_unix_timestamp(lag_time))/60 > 10, 1,0) flag -- 将日期转为时间戳,秒为单位
from
(
select
uid,
start_time,
end_time,
flow,
lag(end_time,1,start_time) over(partition by uid order by start_time) lag_time
from
tb_flowCount
)
--
+---+-------------------+-------------------+----+----+
|uid| start_time| end_time|flow|flag|
+---+-------------------+-------------------+----+----+
| 1|2020-02-18 14:20:30|2020-02-18 14:46:30| 20| 0|
| 1|2020-02-18 14:47:20|2020-02-18 15:20:30| 30| 0|
| 1|2020-02-18 15:37:23|2020-02-18 16:05:26| 40| 1|
| 1|2020-02-18 16:06:27|2020-02-18 17:20:49| 50| 0|
| 1|2020-02-18 17:21:50|2020-02-18 18:03:27| 60| 0|
| 3|2020-02-18 14:39:58|2020-02-18 15:35:53| 20| 0|
| 3|2020-02-18 15:36:39|2020-02-18 15:24:54| 30| 0|
| 2|2020-02-18 14:18:24|2020-02-18 15:01:40| 20| 0|
| 2|2020-02-18 15:20:49|2020-02-18 15:30:24| 30| 1|
| 2|2020-02-18 16:01:23|2020-02-18 16:40:32| 40| 1|
| 2|2020-02-18 16:44:56|2020-02-18 17:40:52| 50| 0|
+---+-------------------+-------------------+----+----+
3
select
uid,
start_time,
end_time,
flow,
sum(flag) over(partition by uid order by start_time) sum_flag
from
(
select
uid,
start_time,
end_time,
flow,
if((to_unix_timestamp(start_time) - to_unix_timestamp(lag_time))/60 > 10, 1,0) flag -- 将日期转为时间戳,秒为单位
from
(
select
uid,
start_time,
end_time,
flow,
lag(end_time,1,start_time) over(partition by uid order by start_time) lag_time
from
tb_flowCount
)
)
--+---+-------------------+-------------------+----+--------+
|uid| start_time| end_time|flow|sum_flag|
+---+-------------------+-------------------+----+--------+
| 1|2020-02-18 14:20:30|2020-02-18 14:46:30| 20| 0|
| 1|2020-02-18 14:47:20|2020-02-18 15:20:30| 30| 0|
| 1|2020-02-18 15:37:23|2020-02-18 16:05:26| 40| 1|
| 1|2020-02-18 16:06:27|2020-02-18 17:20:49| 50| 1|
| 1|2020-02-18 17:21:50|2020-02-18 18:03:27| 60| 1|
| 3|2020-02-18 14:39:58|2020-02-18 15:35:53| 20| 0|
| 3|2020-02-18 15:36:39|2020-02-18 15:24:54| 30| 0|
| 2|2020-02-18 14:18:24|2020-02-18 15:01:40| 20| 0|
| 2|2020-02-18 15:20:49|2020-02-18 15:30:24| 30| 1|
| 2|2020-02-18 16:01:23|2020-02-18 16:40:32| 40| 2|
| 2|2020-02-18 16:44:56|2020-02-18 17:40:52| 50| 2|
+---+-------------------+-------------------+----+--------+
4
select
uid,
min(start_time) start_time,
max(end_time) end_time,
sum(flow) flow
from
(
select
uid,
start_time,
end_time,
flow,
sum(flag) over(partition by uid order by start_time) sum_flag
from
(
select
uid,
start_time,
end_time,
flow,
if((to_unix_timestamp(start_time) - to_unix_timestamp(lag_time))/60 > 10, 1,0) flag -- 将日期转为时间戳,秒为单位
from
(
select
uid,
start_time,
end_time,
flow,
lag(end_time,1,start_time) over(partition by uid order by start_time) lag_time
from
tb_flowCount
)
)
)
group by uid,sum_flag
--
+---+-------------------+-------------------+---------+
|uid| start_time| end_time| flow|
+---+-------------------+-------------------+---------+
| 1|2020-02-18 14:20:30|2020-02-18 15:20:30| 50|
| 1|2020-02-18 15:37:23|2020-02-18 18:03:27| 150|
| 3|2020-02-18 14:39:58|2020-02-18 15:35:53| 50|
| 2|2020-02-18 14:18:24|2020-02-18 15:01:40| 20|
| 2|2020-02-18 15:20:49|2020-02-18 15:30:24| 30|
| 2|2020-02-18 16:01:23|2020-02-18 17:40:52| 90|
+---+-------------------+-------------------+---------+