@时空数据的DP压缩算法
时空数据的轨迹压缩(DP)
由于船舶AIS数据的冗杂,所以为了降低数据挖掘难度和为了展现清洗的轨迹,我们采用DP压缩来整。
距离转化公式
下面展示一些 内联代码片
。
// A code block
var foo = 'bar';
def Distance(lon_1,lat_1,lon_2,lat_2):
'''
由两个位置的经纬度信息求距离
:param lon_1:
:param lat_1:
:param lon_2:
:param lat_2:
:return:
'''
R = 6378137 # 地球半径·
lat_1 = lat_1 * math.pi/180
lat_2 = lat_2 * math.pi/180
dater_lat = lat_1 - lat_2
dater_lon = (lon_1-lon_2)*math.pi/180
s_lat = math.sin(dater_lat/2)
s_lon = math.sin(dater_lon/2)
d = 2 * R * math.asin(math.sqrt(s_lat**2 + math.cos(lat_1)*math.cos(lat_2)*s_lon**2))
return d;
得到垂直距离
使用海伦公式
下面展示一些 内联代码片
。
// A code block
var foo = 'bar';
// An highlighted block
def get_vertical_dist(df,start_index,end_index,index): # 得到垂直距离
'''
:param df: 表格
:param start_index: 起点
:param end_index: 终点
:param index:
:return:
'''
a=math.fabs(Distance(df['lon'][start_index],df['lat'][start_index],df['lon'][end_index],df['lat'][end_index])) # 开始结束两点间的距离
#当弦两端重合时,点到弦的距离变为点间距离
if a==0:
return math.fabs(Distance(df['lon'][start_index],df['lat'][start_index],df['lon'][index],df['lat'][index]))
b=math.fabs(Distance(df['lon'][start_index],df['lat'][start_index],df['lon'][index],df['lat'][index]))
c=math.fabs(Distance(df['lon'][end_index],df['lat'][end_index],df['lon'][index],df['lat'][index]))
p=(a+b+c)/2
S=math.sqrt(math.fabs(p*(p-a)*(p-b)*(p-c)))
vertical_dist=S*2/a
return vertical_dist;
递归压缩(DP核心)
// A code block
var foo = 'bar';
// An highlighted block
def DP_compress(df,output_point_list,Dmax):
start_index = 0
end_index = len(df['lon']) - 1
# 起止点必定是关键点,但是作为递归程序此步引入了冗余数据,后期必须去除
output_point_list.append(df.iloc[start_index].values)
output_point_list.append(df.iloc[end_index].values)
if start_index<end_index:
index=start_index+1 #工作指针,遍历除起止点外的所有点
max_vertical_dist=0 #路径中离弦最远的距离
key_point_index=0 #路径中离弦最远的点,即划分点
while (index < end_index):
cur_vertical_dist = get_vertical_dist(df,start_index,end_index,index)
if cur_vertical_dist > max_vertical_dist:
max_vertical_dist = cur_vertical_dist
key_point_index = index # 记录划分点
print(max_vertical_dist)
index += 1
# print(max_vertical_dist)
# print(key_point_index)
# print('_________________________')
# 递归划分路径
if max_vertical_dist >= Dmax:
df1 = df.iloc[start_index:key_point_index].values
df1 = pd.DataFrame(df1)
df1.columns = ['mmsi','lon','lat','v','c','time']
df2 = df.iloc[key_point_index:end_index].values
df2 = pd.DataFrame(df2)
df2.columns = ['mmsi','lon','lat','v','c','time']
DP_compress(df1, output_point_list, Dmax)
DP_compress(df2, output_point_list, Dmax)
return output_point_list
;
主程序
// A code block
var foo = 'bar';
// An highlighted block
if __name__ == '__main__':
output_point_list = []
fd = open('D:\python\python_data\轨迹压缩\临时数据.csv')
fd = pd.read_csv(fd)
df = fd.sort_values(by='time')
a = DP_compress(df,output_point_list,Dmax=10)
a = pd.DataFrame(a)
print(a)
;