引言
哈密顿路径(Hamiltonian Path)
是图论中的一个概念,它指的是一条通过图中每个顶点一次且仅一次的路径。具体而言,对于一个图 G,如果存在一条最短路路径,使得路径上的每个顶点都不重复,且路径经过图中的每个顶点一次,那么这条路径就是图 G 的哈密顿路径。
一个哈密顿路径是图的遍历,它访问每个节点一次,且不重复,形成一条路径。特别地,如果哈密顿路径的起点和终点相同,形成一个回路,那么这就是哈密顿回路。
哈密顿路径和哈密顿回路在图论中是重要的问题,与著名的旅行商问题(Traveling Salesman Problem)
相关。在旅行商问题中,目标是找到一条经过所有城市一次且最短的路径。哈密顿路径的存在性和寻找算法对于解决旅行商问题提供了一些思路。
文章定义了一个名为Hamilton的类,结合了networkx
、geopandas
和matplotlib
这几个 Python 库,通过这些库实现了哈密顿路径的生成并可视化。
更新一下(2024…5.28):考虑到数据太多,使用networkx库地计算时间成本较高,所以用矩阵运算尝试计算哈密顿路径。
构建图
我们自定义几个点存入gdf
,然后根据gdf
构建图(G)
,图中的节点信息来自gdf的id
,而边的权重则根据点之间的距离确定。
import geopandas as gpd
import networkx as nx
import matplotlib.pyplot as plt
from shapely.geometry import Point
# 创建一个示例的geodataframe
data = {'id': ['P1', 'P2', 'P3', 'P4', 'P5', 'P6'],
'geometry': [Point(3, 2), Point(5, 3), Point(0, 1), Point(5, 7), Point(1, 3), Point(2, 1)]}
gdf = gpd.GeoDataFrame(data, crs='EPSG:4326')
# 构建无向图
G = nx.Graph()
# 添加点到图中
for index, row in gdf.iterrows():
G.add_node(index, pos=(row['geometry'].x, row['geometry'].y))
# 添加边到图中(在这个示例中,任意两点之间都有一条边)
for i in range(len(gdf)):
for j in range(i + 1, len(gdf)):
distance = round(gdf.geometry.iat[i].distance(gdf.geometry.iat[j]),2)
G.add_edge(i, j, weight=distance)
# 绘制图
pos = nx.get_node_attributes(G, 'pos')
labels = {index: str(gdf['id'].iloc[index]) for index in range(len(gdf))}
nx.draw(G,
pos,
labels=labels,
with_labels=True,
font_weight='bold',
node_size=200,
node_color='lightblue',
font_size=10,
font_color='black',
edge_color='gray',
linewidths=1,
alpha=0.7)
# 添加边的权重标签
edge_labels = nx.get_edge_attributes(G, 'weight')
nx.draw_networkx_edge_labels(G, pos, edge_labels=edge_labels)
plt.show()
定义Hamilton类
class Hamilton:
def __init__(self, gdf):
self.gdf = gdf
self.G = None
self.hamilton_nodes = None
def generating_graph(self):
self.G = nx.Graph()
# 添加点到图中
for index, row in self.gdf.iterrows():
self.G.add_node(index, pos=(row['geometry'].x, row['geometry'].y))
# 添加边到图中(在这个示例中,任意两点之间都有一条边)
for i in range(len(self.gdf)):
for j in range(i + 1, len(self.gdf)):
distance = round(self.gdf.geometry.iat[i].distance(self.gdf.geometry.iat[j]), 2)
self.G.add_edge(i, j, weight=distance)
return self.G
def points_order(self):
self.generating_graph()
# 根据边确定最长的两个点为起始点
max_edge = max(self.G.edges(data=True), key=lambda x: x[2]['weight'])
start_node, end_node, weight = max_edge
self.hamilton_nodes = [start_node,end_node]
# 循环根据新点的插入系列获取最短路径,直到所有点遍历结束
while len(self.hamilton_nodes)<len(self.G.nodes):
other_node = [node for node in self.G.nodes if node not in self.hamilton_nodes][0]
all_permutations = self.generating_permutation(self.hamilton_nodes, other_node)
self.hamilton_nodes = min(all_permutations, key=self.calculate_distance)
return self.hamilton_nodes
def generating_permutation(self, lst, element):
result = []
# 遍历列表的每个位置(索引)
for index in range(len(lst) + 1):
# 创建一个新的列表,在指定位置插入元素
new_lst = lst.copy()
new_lst.insert(index, element)
result.append(new_lst) # 将新列表添加到结果中
return result
def calculate_distance(self, nodes):
total_distance = 0
for i in range(len(nodes) - 1):
distance = self.G.get_edge_data(nodes[i], nodes[i+1])['weight']
total_distance += distance
return total_distance
def plot_hamilton_path(self):
self.generating_graph()
self.points_order()
pos = nx.get_node_attributes(self.G, 'pos')
labels = {index: str(self.gdf[str(self.ID)].iloc[index]) for index in self.hamilton_nodes}
nx.draw_networkx_nodes(self.G, pos, node_size=200, node_color='lightblue')
nx.draw_networkx_labels(self.G, pos, labels=labels, font_size=8, font_weight='bold', font_color='black')
hamilton_edges = [(self.hamilton_nodes[i], self.hamilton_nodes[i+1]) for i in range(len(self.hamilton_nodes)-1)]
nx.draw_networkx_edges(self.G, pos, edgelist=hamilton_edges, edge_color='red', width=2)
plt.show()
# 调用类
hamilton_instance = Hamilton(gdf)
graph = hamilton_instance.generating_graph()
ordered_nodes = hamilton_instance.points_order()
hamilton_instance.plot_hamilton_path()
print("Hamiltonian Nodes:", ordered_nodes)
print("Hamiltonian IDs:", gdf.loc[ordered_nodes]['id'].to_list())
Hamiltonian Nodes: [2, 4, 5, 0, 1, 3]
Hamiltonian IDs: ['P3', 'P5', 'P6', 'P1', 'P2', 'P4']
纯矩阵运算
def get_distance_matrix(points):
"""
使用矩阵计算点之间的欧氏距离
输入格式为 df[x,y]
"""
n = points.shape[0] # 获取数据长度
tiled_points = np.tile(points, (n, 1, 1)) # 将数组points沿其第一个轴以n倍的方式进行复制拼接,而在其余轴上则保持原样: n*n*2
# 原数组tiled_points的第二个轴(行)变为新数组的第一个轴(列);
# 原数组tiled_points的第一个轴(列)变为新数组的第二个轴(行);
# 原数组tiled_points的第三个轴保持不变,仍然是新数组的第三个轴。
# n*n*2
tiled_points_T = np.transpose(tiled_points, axes=(1, 0, 2))
distance_matrix = np.linalg.norm(tiled_points - tiled_points_T, axis=2)
return distance_matrix
# 左到右以此插入序列,可以得到的序列组合
def get_insert_sequences(node, nodes_list):
insert_sequences = []
for i in range(len(nodes_list) + 1):
sequence = nodes_list.copy() # 复制新的节点列表,避免在原列表上进行修改
sequence.insert(i, node) # 在指定位置插入节点
insert_sequences.append(sequence)
return insert_sequences
# 贪婪算法获取最短路径
def get_shortest_sequence(df):
matrix = get_distance_matrix(df)
distance_df = pd.DataFrame(data=matrix, index=df.index, columns=df.index)
# 找到距离最大的两个点
max_points = np.unravel_index(np.argmax(matrix), matrix.shape)
# 初始化最短路径长度为最大值
start_node, end_node = max_points
shortest_length_nodes = [start_node, end_node]
nodes = [node for node in distance_df.index if node != start_node and node != end_node]
shortest_sequence = shortest_length_nodes.copy()
while len(nodes) > 0:
# 选择插入节点
node = np.random.choice(nodes)
nodes.remove(node)
# 生成插入序列
insert_sequences = get_insert_sequences(node, shortest_sequence)
# 初始化参数
shortest_length = np.inf
# 寻找最短路径
for sequence in insert_sequences:
sequence_length = 0
for j in range(len(sequence) - 1):
# 使用距离矩阵中的距离值
sequence_length += matrix[sequence[j]][sequence[j+1]]
if sequence_length < shortest_length:
shortest_length = sequence_length
shortest_sequence = sequence
return shortest_sequence
eIDs = []
poi_documents = []
for eID in tqdm(poi1.eID.unique(), desc='get shortest length'):
group = poi1.query(f'eID=={eID}').copy().reset_index(drop=True)
# 放大坐标,避免小数位数太多造成精度问题,导致结果不一致
group['wgs84lon'] = group['wgs84lon'] * 1000
group['wgs84lat'] = group['wgs84lon'] * 1000
eIDs.append(eID)
poi_documents.append(list(group.loc[get_shortest_sequence(group[['wgs84lon','wgs84lat']])]['中类']))
Docs = pd.DataFrame(data={'eID':eIDs,'Doc':poi_documents})
Docs.to_csv('data/data_processing/地理语料库_taz.csv', index=False)