连续时间段去重

--连续时间段去重,并求关联轨迹

--入参表(小表)
CREATE table public.test1(id int,name varchar,record_time timestamp,place_id int);
--日志记录表(大表)
CREATE table public.test(id int,name varchar,record_time timestamp,place_id int);

INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:00.000',111);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:01.000',111);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:02.000',112);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:03.000',112);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:05.000',113);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:06.000',113);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:07.000',112);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:08.000',114);
INSERT INTO public.test1 (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:04.000',111);


INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:00.000',111);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:01.000',111);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:02.000',112);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:03.000',112);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:05.000',113);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:06.000',113);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:07.000',112);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
1,'zhangsan','2021-04-27 00:00:08.000',114);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:00.000',111);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:01.000',111);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:02.000',112);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:03.000',112);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:04.000',111);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:05.000',113);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:06.000',113);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:07.000',112);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:08.000',114);
INSERT INTO public.test (id,name,record_time,place_id) VALUES (
2,'lisi','2021-04-27 00:00:09',115);


--生成目标轨迹的数据来源
----每个区域时间连续段(在同一个place_id被抓拍多次,中间没有去其他地方) 只保留一条数据)
with tt as (
SELECT *,
ROW_NUMBER ()OVER (PARTITION by id ORDER by record_time)region_id
from (
SELECT *,
ROW_NUMBER ()OVER (PARTITION by id ORDER by record_time)rn ,
lag(place_id,1)over(PARTITION by id ORDER by record_time)lag_place_id
FROM public.test1
)a where rn=1 or place_id<>lag_place_id 
)
,
--求地理位置区域个数(real_region_total_num)和区域时间连续段个数(virtual_region_total_num)
t_region_num1 as (
select id,name,COUNT( place_id)virtual_region_total_num,
COUNT(DISTINCT place_id)real_region_total_num
from tt group by  id,name
),
table1 as (
SELECT tt.id,tt.name,tt.record_time,tt.place_id,tt.region_id,
t_region_num1.virtual_region_total_num,t_region_num1.real_region_total_num
from tt inner join t_region_num1
on tt.id=t_region_num1.id and tt.name=t_region_num1.name)
--SELECT *from table1
-----------------------------------------------------------
--生成相似轨迹的数据来源
, t as (
SELECT *,
ROW_NUMBER ()OVER (PARTITION by id ORDER by record_time)region_id
from (
SELECT *,
ROW_NUMBER ()OVER (PARTITION by id ORDER by record_time)rn ,
lag(place_id,1)over(PARTITION by id ORDER by record_time)lag_place_id
FROM public.test
)a where rn=1 or place_id<>lag_place_id 
)
,
t_region_num as (
select id,name,COUNT( place_id)virtual_region_total_num,
COUNT(DISTINCT place_id)real_region_total_num
from t group by  id,name
)
--得到每个时间区域段只保留一条数据的人员轨迹
,table2 as (
SELECT t.id,t.name,t.record_time,t.place_id,t.region_id,
t_region_num.virtual_region_total_num,t_region_num.real_region_total_num
from t inner join t_region_num
on t.id=t_region_num.id and t.name=t_region_num.name)
--SELECT *from table2
,res_people as (
select a.id,a.name,a.b_virtual_region_total_num from (
select a.*,b.virtual_region_total_num as b_virtual_region_total_num from table2 a inner join table1 b 
on a.place_id=b.place_id and a.region_id=b.region_id
)a 
--比较关联上的条数 与目标轨迹经过的区域时间连续段个数 是否相等,相等则完全匹配上
group by a.id,a.name,a.b_virtual_region_total_num having count(1) =a.b_virtual_region_total_num)
--关联原表,只保留匹配上的那几条,多余的条数过滤掉 A:1-2-3-4 ,B:1-2-3-4-1 去掉B中的最后那个1那条
select a.id,a.name,a.record_time,a.place_id as 经过的区域ID,a.region_id as 经过各个区域的先后顺序,
a.real_region_total_num as 出现过的地理区域个数,a.virtual_region_total_num as 虚拟停留区域个数,
b.b_virtual_region_total_num as 与目标轨迹吻合的停留区域个数 from table2 a inner join res_people b 
on a.id=b.id and a.name=b.name 
-- A:1-2-3-4 ,B:1-2-3-4-1 去掉B中的最后那个1那条
and a.region_id<=b.b_virtual_region_total_num;

---
--标识每段数据
SELECT *,sum(case when place_id=lag_place_id then 0 else 1 end)over(PARTITION by id ORDER by record_time)flag
from (
SELECT *,ROW_NUMBER ()OVER (PARTITION by id ORDER by record_time)rn ,
lag(place_id,1)over(PARTITION by id ORDER by record_time)lag_place_id from public.test t )a;

--观察多插入一条数据有何不同
INSERT INTO public.test  (id,"name",record_time,place_id,rn,lag_place_id,flag) VALUES 
(1,'zhangsan','2021-04-27 00:00:08',115,9,114,6)
;

SELECT *,sum(case when place_id=lag_place_id then 0 else 1 end)over(PARTITION by id ORDER by record_time)flag
from (
SELECT *,ROW_NUMBER ()OVER (PARTITION by id ORDER by record_time)rn ,
lag(place_id,1)over(PARTITION by id ORDER by record_time)lag_place_id from public.test t )a;

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值