大数据系统与大规模数据分析 之 作业三
问题描述
作业三:同步图运算编程
- 总体任务
- 实现SSSP的图运算
- 输入:图,v0
- 输出:顶点ID,最短路长度
SSSP编程
SSSP
定义:
- 单源最短路径Single Source Shortest Path
- 给定一个顶点v0,求v0到每个顶点的最短路径
伪代码:
function Dijkstra(Graph, source): dist[source] = 0 //Distance from source to source prev[source] = undefined for each vertex v in Graph: if v != source: dist[v] = inf prev[v] = undefined add v to Q // Q = unvisited nodes while Q is not empty: u = vertex in Q with min dist[u] // source node in first case remove u from Q for each neighbor v of u: alt = dist[u] + length(u, v) if alt < dist[v]: dist[v] = alt prev[v] = u return dist[], prev[]
Dijkstra的C++实现:
struct edge { int to, length; }; int dijkstra(const vector< vector<edge> > &graph, int source, int target) { vector<int> min_distance( graph.size(), INT_MAX ); min_distance[ source ] = 0; set< pair<int,int> > active_vertices; active_vertices.insert( {0,source} ); while (!active_vertices.empty()) { int where = active_vertices.begin()->second; if (where == target) return min_distance[where]; active_vertices.erase( active_vertices.begin() ); for (auto ed : graph[where]) if (min_distance[ed.to] > min_distance[where] + ed.length) { active_vertices.erase( { min_distance[ed.to], ed.to } ); min_distance[ed.to] = min_distance[where] + ed.length; active_vertices.insert( { min_distance[ed.to], ed.to } ); } } return INT_MAX; }
同步图计算
- 图计算
- 同步图计算:
- 图计算模型:
- 图运算结束:Active 和 Inactive
- 所有顶点都变为Inactive时,结束
- 初始化所有顶点都为Active
- 图运算结束:Active 和 Inactive
- 系统架构:
- master分配,每个worker对于一个Graph partition
- 超步开始:master发布开始消息
- 超步计算:每个worker进行本地的计算,为本partition的每个顶点调用compute
- 超步结束:超步k完成,取决于最慢的一个超步完成时间
- 超步开始:k+1开始
- 特点:
- 需要进行多次的迭代
- 图计算模型:
- 异步图运算:
- 思路:允许不同顶点有不同的更新速度
- GraphLab:
- 共享内存,直接访问内存
- 同步图计算:
- Graphlite:
- GraphLite图计算框架属于BSP模型:
- GraphLite github地址 https://github.com/schencoding/GraphLite
- 图很适合进行分布式并行计算,比如最短路径,PageRank等问题
- 比较著名的图计算框架有Prege,cmu的GraphLab,apache的Giraph等。。
- 系统函数:
- getValue:读
- mutableValue:修改
- sendMessageToAllNeighbors():发送给每个邻居同样的消息
- getOutEdgeIterator():发送不同值:
- 得到OutEdgeIterator,依次访问邻边,sendMessageTo()发送消息
- voteToHalt():
- superstep():获取当前超步数,从0开始计数
- 实现函数:
- Vertex:顶点、边、发送的消息都是double
- compute(msg);
- GraphLite图计算框架属于BSP模型:
程序源码
#include <stdio.h>
#include <string.h>
#include <math.h>
#include <limits>
#include <iostream>
using namespace std;
//#include <array>
//#include <list>
#include "GraphLite.h"
#define VERTEX_CLASS_NAME(name) SSSP##name
#define EPS 1e-6
#include<climits>
//#define INF (numeric_limits<int>::max())
#define INF INT_MAX
int v0_id; // source_id
class VERTEX_CLASS_NAME(InputFormatter): public InputFormatter {
public:
int64_t getVertexNum() {
unsigned long long n;
sscanf(m_ptotal_vertex_line, "%lld", &n);
m_total_vertex= n;
return m_total_vertex;
}
int64_t getEdgeNum() {
unsigned long long n;
sscanf(m_ptotal_edge_line, "%lld", &n);
m_total_edge= n;
return m_total_edge;
}
int getVertexValueSize() {
m_n_value_size = sizeof(double);
return m_n_value_size;
}
int getEdgeValueSize() {
m_e_value_size = sizeof(double);
return m_e_value_size;
}
int getMessageValueSize() {
m_m_value_size = sizeof(double);
return m_m_value_size;
}
void loadGraph() {
unsigned long long last_vertex;
unsigned long long from;
unsigned long long to;
double weight = 0;
double value = 1;
int outdegree = 0;
const char *line= getEdgeLine();
// Note: modify this if an edge weight is to be read
// modify the 'weight' variable
sscanf(line, "%lld %lld %lf", &from, &to, &weight);
addEdge(from, to, &weight);
last_vertex = from;
++outdegree;
for (int64_t i = 1; i < m_total_edge; ++i) {
line= getEdgeLine();
// Note: modify this if an edge weight is to be read
// modify the 'weight' variable
sscanf(line, "%lld %lld %lf", &from, &to, &weight);
if (last_vertex != from) {
addVertex(last_vertex, &value, outdegree);
last_vertex = from;
outdegree = 1;
} else {
++outdegree;
}
addEdge(from, to, &weight);
}
addVertex(last_vertex, &value, outdegree);
}
};
class VERTEX_CLASS_NAME(OutputFormatter): public OutputFormatter {
public:
void writeResult() {
int64_t vid;
double value;
char s[1024];
for (ResultIterator r_iter; ! r_iter.done(); r_iter.next() ) {
r_iter.getIdValue(vid, &value);
int n = sprintf(s, "%lld: %d\n", (unsigned long long)vid, (int)value);
writeNextResLine(s, n);
}
}
};
// An aggregator that records a double value tom compute sum
class VERTEX_CLASS_NAME(Aggregator): public Aggregator<double> {
public:
void init() {
m_global = 0;
m_local = 0;
}
void* getGlobal() {
return &m_global;
}
void setGlobal(const void* p) {
m_global = * (double *)p;
}
void* getLocal() {
return &m_local;
}
void merge(const void* p) {
m_global += * (double *)p;
}
void accumulate(const void* p) {
m_local += * (double *)p;
}
};
class VERTEX_CLASS_NAME(): public Vertex <double, double, double> {
public:
void compute(MessageIterator* pmsgs) {
int val;
int source_id = (int)v0_id;
//if(getSuperstep() == 0){
if ((double)getVertexId() == source_id){
val = 0;
//printf("12312312312312111111111111");
} else {
val = INF;
}
//printf(" 12 %lf", (double)getVertexId());
//if (getSuperstep() == 0) {
//val= 10000;
// sendMessageToAllNeighbors(val);
//} else {
if (getSuperstep() >= 50) {
double global_val = * (double *)getAggrGlobal(0);
if (global_val < EPS) {
voteToHalt(); return;
}
}
//printf("!!!!!!!msg value %f\n", (double)pmsgs->getValue());
for ( ; ! pmsgs->done(); pmsgs->next() ) {
//sum += pmsgs->getValue();
if (pmsgs->getValue() < val){
val = pmsgs->getValue();
/*if (1 == getVertexId()){
printf("this %lf",(double)pmsgs->getValue());} */
}
/* if(1==getVertexId()){
printf("testttttt %f",(double)val);}*/
}
// if(1==getVertexId()){printf("%f \n",(double)val);}
//printf("the getValue()= %f \n",(double)getValue());
if((val < getValue())|| getSuperstep() == 0)
{
if(1==getVertexId()){printf("%f \n",(double)val);}
printf("getValue() = %f \n",(double)getValue());
* mutableValue() = val;
//double acc = fabs(getValue() - val);
//accumulateAggr(0, &acc);
OutEdgeIterator otherEdge = getOutEdgeIterator();
for (; ! otherEdge.done(); otherEdge.next()){
double vertex_id = otherEdge.target();
double edge_value = otherEdge.getValue();
//sendMessageTo(vertex_id, val + edge_value);
//printf("the edge_value: %f \n", (double)edge_value);
/*if ( getVertexId() == 0){
printf("the vertex_id:%f \n", (double)vertex_id);
}*/
//if (vertex_id == source_id) {
// val = 0;
// //* mutableValue() = val;
// sendMessageTo(vertex_id, val + edge_value);
// //printf("111: %lf", (double)val);
//} else {
sendMessageTo(vertex_id, val + edge_value);
//}
}
}
// * mutableValue() = val;
//if(getVertexId() < 4){printf("\n%lf here is %lf \n",(double)getVertexId(),(double)val);}
//const int64_t n = getOutEdgeIterator().size();
//sendMessageToAllNeighbors(val / n);
//printf("the end msg value %d", (int)pmsgs->getValue());
voteToHalt();
}
};
class VERTEX_CLASS_NAME(Graph): public Graph {
public:
VERTEX_CLASS_NAME(Aggregator)* aggregator;
public:
// argv[0]: PageRankVertex.so
// argv[1]: <input path>
// argv[2]: <output path>
// argv[3]: <source-id>
void init(int argc, char* argv[]) {
setNumHosts(5);
setHost(0, "localhost", 1411);
setHost(1, "localhost", 1421);
setHost(2, "localhost", 1431);
setHost(3, "localhost", 1441);
setHost(4, "localhost", 1451);
if (argc < 3) {
printf ("Usage: %s <input path> <output path>\n", argv[0]);
exit(1);
}
m_pin_path = argv[1];
m_pout_path = argv[2];
v0_id = atoi(argv[3]);
aggregator = new VERTEX_CLASS_NAME(Aggregator)[1];
regNumAggr(1);
regAggr(0, &aggregator[0]);
}
void term() {
delete[] aggregator;
}
};
/* STOP: do not change the code below. */
extern "C" Graph* create_graph() {
Graph* pgraph = new VERTEX_CLASS_NAME(Graph);
pgraph->m_pin_formatter = new VERTEX_CLASS_NAME(InputFormatter);
pgraph->m_pout_formatter = new VERTEX_CLASS_NAME(OutputFormatter);
pgraph->m_pver_base = new VERTEX_CLASS_NAME();
return pgraph;
}
extern "C" void destroy_graph(Graph* pobject) {
delete ( VERTEX_CLASS_NAME()* )(pobject->m_pver_base);
delete ( VERTEX_CLASS_NAME(OutputFormatter)* )(pobject->m_pout_formatter);
delete ( VERTEX_CLASS_NAME(InputFormatter)* )(pobject->m_pin_formatter);
delete ( VERTEX_CLASS_NAME(Graph)* )pobject;
}