目的:
将一块或两块不规则的地图图像的边界坐标按照一定顺序排列
步骤
1. 读取地图图片并输出边界坐标:
import cv2
import numpy as np
from matplotlib import pyplot as plt
import re
import math
import pandas as pd
# 读取图像&转为灰度图
image = cv2.imread('./data/to/fig/path.png', cv2.IMREAD_GRAYSCALE)
# 转为二值图
_, binary_img = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
# 图片反色
inverse_img = cv2.bitwise_not(binary_img)
# 边界矩阵上下翻转
flipped_ud_edges = np.flipud(inverse_img)
# 查找图像轮廓
# CHAIN_APPROX_NONE、CHAIN_APPROX_SIMPLE、CHAIN_APPROX_TC89_L1、CHAIN_APPROX_TC89_KCOS
contours, hierarchy = cv2.findContours(flipped_ud_edges, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_NONE)
# 创建一个新的空白图像,绘制轮廓
contour_image = np.zeros_like(image)
cv2.drawContours(contour_image, contours, -1, (255, 255, 255), 2)
# 显示原始图像和轮廓图像
plt.subplot(121), plt.imshow(image)
plt.title("Original Image"), plt.xticks([]), plt.yticks([])
plt.subplot(122), plt.imshow(contour_image)
plt.title("Contour Image"), plt.xticks([]), plt.yticks([])
plt.show()
# cv2.imwrite('./data/CZm5y2223borderP1.jpg', edges)
2. 获取轮廓坐标
# 获取轮廓坐标
points = []
for contour in contours:
flattened_array = contour.flatten()
reshaped_array = flattened_array.reshape(contour.shape[0], contour.shape[2])
points.append(reshaped_array)
检查轮廓是否正确,可以画出来看一下:
# plt.scatter(edge_coordinates[:,0],edge_coordinates[:,1],s=0.5)
plt.scatter(points[0][:,0],points[0][:,1],s=0.5)
# plt.gca().invert_yaxis()
plt.show()
3. 定义排序算法(旅行商问题)
根据需要,将坐标按照最近距离排序:
# 将坐标按照最近距离访问路径排序
def tsp_greedy(coordinates, start_point_index): # 传入坐标组以及起点坐标的索引
n = len(coordinates)
visited = [False] * n # 记录每个城市是否已经访问
path = [] # 记录访问路径
current_city = start_point_index # 当前所在城市
dist_ = []
while len(path) < n:
path.append(current_city)
visited[current_city] = True
min_distance = math.inf
next_city = -1
# 找到下一个未访问城市中距离最近的城市
for i in range(n):
if not visited[i] and i != current_city:
dist = calculate_distance(coordinates[current_city], coordinates[i])
dist_.append(dist)
if dist < min_distance:
min_distance = dist
next_city = i
current_city = next_city
# 当只剩下最后几个距离远的点的时候,舍弃调这个点
if dist > dist_.mean()*5:
# 回到起始城市
path.append(start_point_index)
# 构建排序后的坐标列表
sorted_coordinates = [coordinates[i] for i in path]
return sorted_coordinates
# 计算两个坐标之间的距离
def calculate_distance(coord1, coord2):
x1, y1 = coord1
x2, y2 = coord2
return ((x2 - x1) ** 2 + (y2 - y1) ** 2) ** 0.5
4. 当轮廓数量为2时,找到两个轮廓最近的两个点(定义算法):
def find_closest_coordinates(coords1, coords2):
min_distance = math.inf
closest_coords = None
for coord1 in coords1:
for coord2 in coords2:
distance = math.sqrt((coord1[0] - coord2[0])**2 + (coord1[1] - coord2[1])**2)
if distance < min_distance:
min_distance = distance
closest_coords_index = (find_coordinate_index(coords1,coord1), find_coordinate_index(coords2,coord2))
return closest_coords_index
def find_coordinate_index(coordinates, target_coordinate):
for i, coord in enumerate(coordinates):
if (coord == target_coordinate).all():
return i
return -1
5. 边界轮廓点降采样(均匀)
def uniform_sampling_by_index(data, sample_size):
# 计算step,得到采样数据索引
step = len(data) // sample_size
indices = np.arange(0, len(data), step)
# 按照索引进行均匀采样
sampled_data = data[indices]
return sampled_data
6. 坐标排序
如果轮廓数量大于2,在当前规则下(即坐标最近距离路径),无法做到坐标路径不切割图像,要是有更好的方法再更新轮廓数量大于2的情况。
# 按照最近距离排序
coordinates = []
# 进入循环,将下一个
if len(points) > 1:
# 第一个轮廓
closest_coordinates_index = find_closest_coordinates(points[0], points[1])
sorted_coordinate_a = tsp_greedy(points[0], closest_coordinates_index[0])
coordinate_points_a = np.array(pd.DataFrame(sorted_coordinate_a))
coordinates.append(coordinate_points_a)
# 第二个轮廓
sorted_coordinate_b = tsp_greedy(points[1], closest_coordinates_index[1])
coordinate_points_b = np.array(pd.DataFrame(sorted_coordinate_b))
coordinates.append(coordinate_points_b)
# 回到起点
coordinates.append(coordinates[0][0].reshape(1, 2))
# 将所有坐标合并起来
current_point = np.concatenate(coordinates)
elif len(points) == 1:
if len(points[0]) > 8000:
sampled_data = uniform_sampling_by_index(points[0], int(len(points[0])*0.5))
else:
sampled_data = points[0]
sorted_coordinate_a = tsp_greedy(sampled_data, 0)
coordinate_points_a = np.array(pd.DataFrame(sorted_coordinate_a))
coordinates.append(coordinate_points_a)
current_point = np.concatenate(coordinates)
7. 匹配经纬度,并与坐标关联
通过正则表达式在字符串中匹配出需要的经纬度信息
由于我的经纬度信息在mif文件中,并且都在特定的字符后面,所以可以指定从特定的字符开始匹配:
# 打开MIF文件
with open("./data/CZm5y2223border/CZm5y2223borderP3.mif", "r") as file:
# 读取文件内容
content = file.read()
# 使用正则表达式匹配特定字符串后的值
pattern = r'[\d\.]+\s+[\d\.]+\n\s*[\d\.]+\s+[\d\.]+\s*\n\s*[\d\.]+\s+[\d\.]+\s*\n\s*[\d\.]+\s+[\d\.]+'
search_start = content.find(' 5')
match = re.findall(pattern, content[search_start:])
if match:
value = match
formatted_text = value[0].split('\n')
df = pd.DataFrame(formatted_text)
df[['longitude', 'latitude']] = df[0].str.split(' ', expand=True)
coordinate_df = df.iloc[:,1:]
print("提取的值:\n", coordinate_df)
else:
print("未找到特定字符串")
# 从数据中将经纬度取值取出来
LONGITUDE_MIN = float(coordinate_df[coordinate_df['latitude']==coordinate_df['latitude'].min()]['longitude'])
LATITUDE_MIN = float(coordinate_df['latitude'].min())
LONGITUDE_MAX = float(coordinate_df[coordinate_df['latitude']==coordinate_df['latitude'].max()]['longitude'])
LATITUDE_MAX = float(coordinate_df['latitude'].max())
LONGITUDE_MIN,LATITUDE_MIN,LONGITUDE_MAX,LATITUDE_MAX
# 计算边界像素点的经度和纬度
latitude_scale = (LATITUDE_MAX - LATITUDE_MIN) / flipped_ud_edges.shape[0] # 图像纵向对应的纬度范围
longitude_scale = (LONGITUDE_MAX - LONGITUDE_MIN) / flipped_ud_edges.shape[1] # 图像横向对应的经度范围
longitude_coordinates = LONGITUDE_MIN + longitude_scale * current_point[:,0] # 经度
latitude_coordinates = LATITUDE_MIN + latitude_scale * current_point[:,1] # 纬度
8. 将关联好的轮廓坐标保存
point_array = [list(pair) for pair in zip(longitude_coordinates, latitude_coordinates)]
table_str = ''
for row in point_array:
# print("\t".join(str(item) for item in row))
table_str += "\t".join(str(item) for item in row) + "\n" # 输出每一行数据并换行
# 构造文件名
filename = "./result/point_03.txt"
with open(filename, "w") as file: # 打开名为 table.txt 的文件,以“写入”模式打开;使用 with 语句保证文件操作完成后自动关闭文件
file.write(table_str)
print("文件保存成功!")