《算法导论》dijkstra算法的图,怎么图是歪的?顺便治疗一下颈椎病,嘎嘎!
计算机程序很笨,一开始它看不到整个图,只能从起点开始,慢慢的探索周围的点,由近及远,所以它是一个广度优先搜索。
很自然的,程序刚开始,要把所有点的distance值(从起点到该点的距离)设置为无穷大,意味所有点都是不可达的状态。 然后,我们要把起点的distance值设置为0,并将起点放入一个集合(这个集合可以采用优先队列,队首元素就是distance值最小的那个结点)。接下来,程序进入循环,循环里面做3件事:
(1)从当前集合里找到distance值最小的点(队首元素),准备进行处理。
(2)更新这个点的所有邻接点的distance值,更新的条件是什么呢?假设这个点是A点,它的distance值是50(从起点到A点的距离值是50),它有一个邻接点是B点,此前B点的distance值是100(从起点到B点的距离值是100),又假设A到B的距离只有10,那么我们发现:经过A到达B,有一条比之前100更短的路径,即50+10=60,这时候将B的distance值更新为60,意味着我们找到了一条从起点到达B点的更短的路径。(将这个点的邻接点放入集合,这一个操作要不要,取决于集合是怎么定义的)
(3)将这个处理过的A点从集合中删除(队首元素出队)。
这个算法有两个关键的事情:一是这个集合里的点的个数是如何增减的,二是如何在这个集合里找到distance值最小的点。
做到这两件事不难,但是要保证程序运行效率,需要考虑一些事。
第一,找最小值,用什么方法?所有数据排序?这种排序的目的是“让全体数据有序”,但实际上我们不需要这种“有序”,我们只需要找到那个最小的值就行。用遍历一遍的方法,找到最小值?效率还是低了。最后我们用小根堆来找到最小值,C++里可以自己建堆、调整堆,但是,可以直接用优先队列,速度还要快一点,优先队列它也是用堆实现的。
第二,集合如何定义?这个集合里装的是未处理过的结点,还是待处理的结点?这两者是有区别的。
下面使用老美网站上的一个数据集来测试最短路径程序。网址是:9th DIMACS Implementation Challenge: Shortest Paths
直接使用比较大的道路数据集,1400多万结点,3400多万条边。
先用C++的map,将原始的边数据转为邻接矩阵(这个代码略),而且分多个文件存储,回头可以多线程读取文件,加快文件读取速度,点的数据也分为多个文件存储。
上图数据的解释:从点0到点2516544有一条路径,长度是2025米,从点0到点576739有一条路径,长度是2617米,从点0到点1520470有一条路径,长度是520,也就是说从点0出去有3条路径;然后,从点1到点524282有一条路径,长度是2359……
程序运行结果,读文件到内存1.4秒,找到一个点到其它1400多万个点的最短路径,大概用时1.6秒:
路径有点长,所以保存在一个path.txt文件中:
代码如下:
#include <vector>
#include <queue>
#include <thread>
#include <limits>
#include <iostream>
using namespace std;
#define THREADNUM 14
#define PINFINITY INT_MAX
class CVertex
{
public:
int id;
int distance; //从源点到该点的距离
int parent; //记录最短路径经过的结点
public:
CVertex(int _id = -1, int _distance = -1, int _parent = -1)
{
id = _id;
distance = _distance;
parent = _parent;
}
void operator=(const CVertex& vertex)
{
id = vertex.id;
distance = vertex.distance;
parent = vertex.parent;
}
bool operator<(const CVertex& vertex)
{
return this->distance < vertex.distance;
}
~CVertex()
{
}
};
class CVexIdAndDistance
{
public:
int id;
int distance;
public:
CVexIdAndDistance(int _id = -1, int _distance = -1)
{
id = _id;
distance = _distance;
}
bool operator==(const CVexIdAndDistance& vexIdAndDistance)
{
if (id == vexIdAndDistance.id && distance == vexIdAndDistance.distance)
{
return true;
}
else
{
return false;
}
}
~CVexIdAndDistance()
{
}
};
class CEdge
{
public:
int toVexId;
int weight;
public:
CEdge(int _toVexId = -1, int _weight = 0)
{
toVexId = _toVexId;
weight = _weight;
}
};
class Cmp
{
public:
bool operator() (const CVexIdAndDistance& a, const CVexIdAndDistance& b)
{
return a.distance > b.distance;
}
};
int GetVertexNumThread(const char* vertexInfoFilePath, int* vertexNumPerFile, int tid);
int GetEdgeNumThread(const char* edgeInfoFilePath, int* edgeNum, int tid);
int GetVertexAndEdgeInfoThread(const char* vertexInfoFilePath, const char* edgeInfoFilePath, CVertex* vertex, CEdge** edge, int tid);
int UpdateDistance(CVertex* vertex, priority_queue<CVexIdAndDistance, vector<CVexIdAndDistance>, Cmp>& priorityQueueForSort, const int* edgeNum, const CEdge* const* edge, int srcVexId, int dstVexId);
int GetPath(const CVertex* vertex, int srcVexId, int dstVexId);
char vertexInfoFilePath[50] = "e:\\USA-road-d.Central\\vertex";
char edgeInfoFilePath[50] = "e:\\USA-road-d.Central\\edge";
int totalVertexNum = 0;
int vertexNumPerThread = 0;
int main()
{
clock_t c[10] = { 0 };
c[0] = clock();
cout << THREADNUM << "线程从文件读取点和边数据" << endl;
int all_vertex_file_size = 0;
long long int all_edge_file_size = 0;
FILE* fp = NULL;
for (int i = 0; i < THREADNUM; i++)
{
char vertexInfoFileName[50] = "";
strcat_s(vertexInfoFileName, vertexInfoFilePath);
char temp[5] = "";
_itoa_s(i, temp, 10);
strcat_s(vertexInfoFileName, temp);
strcat_s(vertexInfoFileName, ".txt");
fopen_s(&fp, vertexInfoFileName, "rb");
if (fp == NULL)
return -1;
_fseeki64(fp, 0, SEEK_END);
long long int filesize = _ftelli64(fp);
all_vertex_file_size += filesize;
if (fp != NULL)
fclose(fp);
char edgeInfoFileName[50] = "";
strcat_s(edgeInfoFileName, edgeInfoFilePath);
_itoa_s(i, temp, 10);
strcat_s(edgeInfoFileName, temp);
strcat_s(edgeInfoFileName, ".txt");
fopen_s(&fp, edgeInfoFileName, "rb");
if (fp == NULL)
return -1;
_fseeki64(fp, 0, SEEK_END);
filesize = _ftelli64(fp);
all_edge_file_size += filesize;
if (fp != NULL)
fclose(fp);
}
cout << "点文件大小:" << all_vertex_file_size / 1024.0 / 1024 / 1024 << " GB" << endl;
cout << "边文件大小:" << all_edge_file_size / 1024.0 / 1024 / 1024 << " GB" << endl;
int vertexNumPerFile[THREADNUM] = { 0 };
thread td0[THREADNUM];
for (int i = 0; i < THREADNUM; i++)
{
td0[i] = thread(&GetVertexNumThread, vertexInfoFilePath, vertexNumPerFile, i);
}
for (int i = 0; i < THREADNUM; i++)
{
td0[i].join();
}
for (int i = 0; i < THREADNUM; i++)
{
totalVertexNum += vertexNumPerFile[i];
}
vertexNumPerThread = int((totalVertexNum / 1000000) / THREADNUM) * 1000000;
CVertex* vertex = new CVertex[totalVertexNum];
int* edgeNum = new int[totalVertexNum];
memset(edgeNum, -1, totalVertexNum);
int totalEdgeNum = 0;
int srcVexId = -1, dstVexId = -1;
thread td1[THREADNUM];
for (int i = 0; i < THREADNUM; i++)
{
td1[i] = thread(&GetEdgeNumThread, edgeInfoFilePath, edgeNum, i);
}
for (int i = 0; i < THREADNUM; i++)
{
td1[i].join();
}
for (int i = 0; i < totalVertexNum; i++)
{
totalEdgeNum += edgeNum[i];
}
CEdge** edge = new CEdge * [totalVertexNum];
if (edge == NULL)
{
return -1;
}
for (int i = 0; i < totalVertexNum; i++)
{
edge[i] = NULL;
}
for (int i = 0; i < totalVertexNum; i++)
{
if (edgeNum[i] != 0)
{
edge[i] = new CEdge[edgeNum[i]];
if (edge[i] == NULL)
{
return -1;
}
}
else
edge[i] = NULL;
}
thread td2[THREADNUM];
for (int i = 0; i < THREADNUM; i++)
{
td2[i] = thread(&GetVertexAndEdgeInfoThread, vertexInfoFilePath, edgeInfoFilePath, vertex, edge, i);
}
for (int i = 0; i < THREADNUM; i++)
{
td2[i].join();
}
cout << "图中点数:" << totalVertexNum << endl;
cout << "图中边数:" << totalEdgeNum << endl;
c[1] = clock();
cout << "用时:" << c[1] - c[0] << "毫秒" << endl;
cout << "----------------------------------------" << endl;
c[2] = clock();
int maxOutDegree = 0;
int imax = 0;
for (int i = 0; i < totalVertexNum; i++)
{
if (edgeNum[i] > maxOutDegree)
{
maxOutDegree = edgeNum[i];
imax = i;
}
}
cout << "顶点" << imax << "具有最大出度:" << maxOutDegree << endl;
c[3] = clock();
cout << "用时:" << c[3] - c[2] << "毫秒" << endl;
cout << "----------------------------------------" << endl;
cout << "输入起点ID:";
cin >> srcVexId;
c[4] = clock();
vertex[srcVexId].distance = 0;
priority_queue<CVexIdAndDistance, vector<CVexIdAndDistance>, Cmp> priorityQueueForSort;
priorityQueueForSort.push(CVexIdAndDistance(srcVexId, 0));
UpdateDistance(vertex, priorityQueueForSort, edgeNum, edge, srcVexId, dstVexId);
c[5] = clock();
cout << "计算起点" << srcVexId << "到其它所有点的最短路径用时:" << c[5] - c[4] << "毫秒" << endl;
cout << "----------------------------------------" << endl;
while (true)
{
cout << "输入终点ID:";
cin >> dstVexId;
if (dstVexId == -1)
{
break;
}
c[6] = clock();
GetPath(vertex, srcVexId, dstVexId);
c[7] = clock();
cout << "用时:" << c[7] - c[6] << "毫秒" << endl;
cout << "----------------------------------------" << endl;
}
return 0;
}
int GetVertexNumThread(const char* vertexInfoFilePath, int* vertexNumPerFile, int tid)
{
char vertexInfoFileName[50] = "";
strcat_s(vertexInfoFileName, vertexInfoFilePath);
char temp[5] = "";
_itoa_s(tid, temp, 10);
strcat_s(vertexInfoFileName, temp);
strcat_s(vertexInfoFileName, ".txt");
FILE* fp;
fopen_s(&fp, vertexInfoFileName, "rb");
if (fp == NULL)
return -1;
char* line = new char[100];
while (!feof(fp))
{
fgets(line, 100, fp);
vertexNumPerFile[tid]++;
}
}
int GetEdgeNumThread(const char* edgeInfoFilePath, int* edgeNum, int tid)
{
char edgeInfoFileName[50] = "";
strcat_s(edgeInfoFileName, edgeInfoFilePath);
char temp[5] = "";
_itoa_s(tid, temp, 10);
strcat_s(edgeInfoFileName, temp);
strcat_s(edgeInfoFileName, ".txt");
FILE* fp = NULL;
fopen_s(&fp, edgeInfoFileName, "r");
if (fp == NULL)
return -1;
char* line = new char[totalVertexNum * 2];
int i, j;
int lineIndex = 0;
int tokenIndex = 0;
while (!feof(fp))
{
fgets(line, totalVertexNum * 2, fp);
tokenIndex = 0;
i = 0;
while (line[i] != '\0')
{
if (line[i] >= '0' && line[i] <= '9')
{
j = 0;
while (line[i] >= '0' && line[i] <= '9')
{
i++;
}
tokenIndex++;
}
else
{
i++;
}
}
edgeNum[vertexNumPerThread * tid + lineIndex] = tokenIndex / 2;
lineIndex++;
}
}
int GetVertexAndEdgeInfoThread(const char* vertexInfoFilePath, const char* edgeInfoFilePath, CVertex* vertex, CEdge** edge, int tid)
{
char vertexInfoFileName[50] = "";
strcat_s(vertexInfoFileName, vertexInfoFilePath);
char temp[5] = "";
_itoa_s(tid, temp, 10);
strcat_s(vertexInfoFileName, temp);
strcat_s(vertexInfoFileName, ".txt");
FILE* fp = NULL;
fopen_s(&fp, vertexInfoFileName, "rb");
if (fp == NULL)
return -1;
char* line = new char[totalVertexNum * 2];
char t[50];
int tokenIndex = 0;
while (!feof(fp))
{
fgets(line, totalVertexNum * 2, fp);
int i = 0;
int j;
while (line[i] != '\0')
{
if (line[i] >= '0' && line[i] <= '9')
{
j = 0;
while (line[i] >= '0' && line[i] <= '9')
{
t[j++] = line[i++];
}
t[j] = '\0';
vertex[vertexNumPerThread * tid + tokenIndex].id = atoi(t);
tokenIndex++;
}
else
{
i++;
}
}
}
if (tid < THREADNUM - 1)
{
for (int i = 0; i < vertexNumPerThread; i++)
{
vertex[vertexNumPerThread * tid + i].distance = PINFINITY;
}
}
else
{
for (int i = 0; i < totalVertexNum - vertexNumPerThread * THREADNUM + vertexNumPerThread; i++)
{
vertex[vertexNumPerThread * tid + i].distance = PINFINITY;
}
}
if (fp != NULL)
fclose(fp);
char edgeInfoFileName[50] = "";
strcat_s(edgeInfoFileName, edgeInfoFilePath);
_itoa_s(tid, temp, 10);
strcat_s(edgeInfoFileName, temp);
strcat_s(edgeInfoFileName, ".txt");
fopen_s(&fp, edgeInfoFileName, "rb");
if (fp == NULL)
return -1;
int i, j;
int lineIndex = 0;
tokenIndex = 0;
while (!feof(fp))
{
fgets(line, totalVertexNum * 2, fp);
tokenIndex = 0;
i = 0;
while (line[i] != '\0')
{
if (line[i] >= '0' && line[i] <= '9')
{
j = 0;
while (line[i] >= '0' && line[i] <= '9')
{
t[j++] = line[i++];
}
t[j] = '\0';
if (tokenIndex % 2 == 0)
{
edge[vertexNumPerThread * tid + lineIndex][tokenIndex / 2].toVexId = atoi(t);
}
else
{
edge[vertexNumPerThread * tid + lineIndex][tokenIndex / 2].weight = atoi(t);
}
tokenIndex++;
}
else
{
i++;
}
}
lineIndex++;
}
if (fp != NULL)
fclose(fp);
return 0;
}
int UpdateDistance(CVertex* vertex, priority_queue<CVexIdAndDistance, vector<CVexIdAndDistance>, Cmp>& priorityQueueForSort, const int* edgeNum, const CEdge* const* edge, int srcVexId, int dstVexId)
{
CVexIdAndDistance currentVertex;
vector<CVexIdAndDistance>::iterator iter;
while (true)
{
if (priorityQueueForSort.size() == 0)
{
break;
}
currentVertex = priorityQueueForSort.top();
priorityQueueForSort.pop();
for (int i = 0; i < edgeNum[currentVertex.id]; i++)
{
if (edgeNum[currentVertex.id] == 0)
{
continue;
}
int toVexId = edge[currentVertex.id][i].toVexId;
if (currentVertex.distance + edge[currentVertex.id][i].weight < vertex[toVexId].distance)
{
vertex[toVexId].distance = currentVertex.distance + edge[currentVertex.id][i].weight;
vertex[toVexId].parent = currentVertex.id;
priorityQueueForSort.push(CVexIdAndDistance(toVexId, vertex[toVexId].distance));
}
}
}
return 0;
}
int GetPath(const CVertex* vertex, int srcVexId, int dstVexId)
{
if (vertex[dstVexId].distance == PINFINITY)
{
cout << endl << "no path to the vertex " << dstVexId << endl;
return -1;
}
int* path = new int[totalVertexNum];
memset(path, -1, totalVertexNum);
int i = 0;
int tid = dstVexId;
while (tid != -1)
{
path[i++] = tid;
tid = vertex[tid].parent;
if (tid == srcVexId)
{
break;
}
}
path[i] = tid;
for (int j = 0; j < (i + 1) / 2; j++)
{
int t = path[j];
path[j] = path[i - j];
path[i - j] = t;
}
FILE* fp = NULL;
fopen_s(&fp, "e:\\path.txt", "w");
if (fp == NULL)
return -1;
cout << "从起点" << srcVexId << "到终点" << dstVexId << "的最短路径长度=" << vertex[dstVexId].distance << endl;
fprintf(fp, "起点%d到终点%d的最短路径: %d\n", srcVexId, dstVexId, vertex[dstVexId].distance);
fprintf(fp, "最短路径一共经过%d个顶点(包括起点和终点):\n", i + 1);
for (int j = 0; j <= i; j++)
{
fprintf(fp, "vertex id:%d\n", path[j]);
}
if (fp != NULL)
fclose(fp);
return 0;
}