Douglas-Peucker算法(该算法名字够吓人,其实思想很简单)
在数字化时,要对曲线进行采样,即在曲线上取有限个点,将其变为折线,并且能够在一定程度
上保持原有的形状。
经典的Douglas-Peucker算法步骤如下(如下图):
(1)在曲线首尾两点A,B之间连接一条直线AB,该直线为曲线的弦;
(2)得到曲线上离该直线段距离最大的点C,计算其与AB的距离d;
(3)比较该距离与预先给定的阈值threshold的大小,如果小于threshold,则该直线段作为曲线的近似,该段曲线处理完毕。
(4)如果距离大于阈值,则用C将曲线分为两段AC和BC,并分别对两段取信进行1~3的处理。
(5)当所有曲线都处理完毕时,依次连接各个分割点形成的折线,即可以作为曲线的近似。
这个算法要因地制宜,用于图像处理的contour,提取转折点时,要做两点的改进:
(1)因为这个算法的头尾点肯定是存在的,但是contour的头尾点是相邻像素,因此,最后这个点也要设置为false。
(2)因为这个算法是根据点与弦的距离是否超过threshold,因此,还要检测是否存在同一线段上的多个点,如下图:点1 不是转折点,要移出,还要再做处理。
代码实现:
递归方式实现的Douglas–Peucker算法用于GPS轨迹压缩,参考的原文找不到了,把它封装成一个类。
import math
class Point(object):
def __init__(self, id, x, y):
self.id = id
self.x = x
self.y = y
class DPCompress(object):
def __init__(self, pointList, tolerance):
self.Compressed = list()
self.pointList = pointList
self.tolerance = tolerance
self.runDP(pointList, tolerance)
def calc_height(self, point1, point2, point):
"""
计算point到[point1, point2所在直线]的距离
点到直线距离:
A = point2.y - point1.y;
B = point1.x - point2.x;
C = point2.x * point1.y - point1.x * point2.y
Dist = abs((A * point3.X + B * point3.Y + C) / sqrt(A * A + B * B))
"""
# tops2 = abs(point1.x * point2.y + point2.x * point.y
# + point.x * point1.y - point2.x * point1.y - point.x *
# point2.y - point1.x * point.y)
tops = abs(point1.x * point.y + point2.x * point1.y + point.x * point2.y
- point1.x * point2.y - point2.x * point.y - point.x * point1.y
)
bottom = math.sqrt(
math.pow(point2.y - point1.y, 2) + math.pow(point2.x - point1.x, 2)
)
height = 100 * tops / bottom
return height
def DouglasPeucker(self, pointList, firsPoint, lastPoint, tolerance):
"""
计算通过的内容
DP算法
:param pointList: 点列表
:param firsPoint: 第一个点
:param lastPoint: 最后一个点
:param tolerance: 容差
:return:
"""
maxDistance = 0.0
indexFarthest = 0
for i in range(firsPoint, lastPoint):
distance = self.calc_height(pointList[firsPoint], pointList[lastPoint], pointList[i])
if (distance > maxDistance):
maxDistance = distance
indexFarthest = i
# print('max_dis=', maxDistance)
if maxDistance > tolerance and indexFarthest != 0:
self.Compressed.append(pointList[indexFarthest])
self.DouglasPeucker(pointList, firsPoint, indexFarthest, tolerance)
self.DouglasPeucker(pointList, indexFarthest, lastPoint, tolerance)
def runDP(self, pointList, tolerance):
"""
主要运行结果
:param pointList: Point 列表
:param tolerance: 值越小,压缩后剩余的越多
:return:
"""
if pointList == None or pointList.__len__() < 3:
return pointList
firspoint = 0
lastPoint = len(pointList) - 1
self.Compressed.append(pointList[firspoint])
self.Compressed.append(pointList[lastPoint])
while (pointList[firspoint] == pointList[lastPoint]):
lastPoint -= 1
self.DouglasPeucker(pointList, firspoint, lastPoint, tolerance)
def getCompressed(self):
self.Compressed.sort(key=lambda point: int(point.id))
return self.Compressed
该类的使用方法
import pandas as pd
import numpy as np
import collections
from DPCompress import Point,DPCompress
def load_data(file_path):
columns=['rid','car_id','lon','lat']
df = pd.read_csv(file_path, header=None, names=columns)
df_data = df.loc[df['lon']!=np.nan].reset_index().drop(['index'],axis=1)
car_to_points = collections.defaultdict(list)
for i, row in df_data.iterrows():
row_id = row['rid']
car_id = row['car_id']
lon = float(row['lon'])
lat = float(row['lat'])
pt = Point(row_id, round(lon, 6), round(lat, 6)) # 构造Point对象
car_to_points[car_id].append(pt)
return car_to_points
if __name__ == '__main__':
data_file='data/trajectory.csv'
output_file='data/compressed.csv'
car_points = load_data(data_file)
# print(car_points.keys())
with open(output_file, 'w', encoding='utf-8') as fwriter:
for car, PointList in car_points.items():
points = []
dp = DPCompress(PointList, 3.5)
points = dp.getCompressed()
for p in points:
line = "{},{},{}".format(car, p.x, p.y)
fwriter.write(line)
fwriter.write("\n")
print(car, len(PointList), '-->', len(points))