大数据系统与大规模数据分析之作业三

最新推荐文章于 2024-05-05 17:47:17 发布

a550461053

最新推荐文章于 2024-05-05 17:47:17 发布

阅读量4k

点赞数 1

分类专栏：大数据算法文章标签：大数据同步图计算 SSSP GraphLite

本文链接：https://blog.csdn.net/a550461053/article/details/73743328

版权

大数据同时被 2 个专栏收录

3 篇文章 0 订阅

订阅专栏

算法

1 篇文章 0 订阅

订阅专栏

大数据系统与大规模数据分析之作业三

大数据系统与大规模数据分析之作业三

问题描述

作业三：同步图运算编程

总体任务
- 实现SSSP的图运算
- 输入：图，v0
- 输出：顶点ID，最短路长度

SSSP编程

SSSP

定义:
- 单源最短路径Single Source Shortest Path
- 给定一个顶点v0，求v0到每个顶点的最短路径

伪代码：

function Dijkstra(Graph, source):
    dist[source] = 0 //Distance from source to source
    prev[source] = undefined
    for each vertex v in Graph:
        if v != source:
            dist[v] = inf
            prev[v] = undefined
        add v to Q // Q = unvisited nodes
    while Q is not empty:
        u = vertex in Q with min dist[u] // source node in first case
        remove u from Q
        for each neighbor v of u: 
            alt = dist[u] + length(u, v)
            if alt < dist[v]:
                dist[v] = alt
                prev[v] = u
    return dist[], prev[]

Dijkstra的C++实现：

struct edge { int to, length; };

int dijkstra(const vector< vector<edge> > &graph, int source, int target) {
    vector<int> min_distance( graph.size(), INT_MAX );
    min_distance[ source ] = 0;
    set< pair<int,int> > active_vertices;
    active_vertices.insert( {0,source} );

    while (!active_vertices.empty()) {
        int where = active_vertices.begin()->second;
        if (where == target) return min_distance[where];
        active_vertices.erase( active_vertices.begin() );
        for (auto ed : graph[where]) 
            if (min_distance[ed.to] > min_distance[where] + ed.length) {
                active_vertices.erase( { min_distance[ed.to], ed.to } );
                min_distance[ed.to] = min_distance[where] + ed.length;
                active_vertices.insert( { min_distance[ed.to], ed.to } );
            }
    }
    return INT_MAX;
}

同步图计算

图计算
- 同步图计算：
  - 图计算模型:
    - 图运算结束:Active 和 Inactive
      - 所有顶点都变为Inactive时,结束
      - 初始化所有顶点都为Active
  - 系统架构:
    - master分配,每个worker对于一个Graph partition
    - 超步开始:master发布开始消息
    - 超步计算:每个worker进行本地的计算,为本partition的每个顶点调用compute
    - 超步结束:超步k完成,取决于最慢的一个超步完成时间
    - 超步开始:k+1开始
  - 特点:
    - 需要进行多次的迭代
- 异步图运算:
  - 思路:允许不同顶点有不同的更新速度
  - GraphLab:
    - 共享内存,直接访问内存
Graphlite:
- GraphLite图计算框架属于BSP模型：
  - GraphLite github地址 https://github.com/schencoding/GraphLite
  - 图很适合进行分布式并行计算，比如最短路径，PageRank等问题
  - 比较著名的图计算框架有Prege，cmu的GraphLab，apache的Giraph等。。
- 系统函数：
  - getValue：读
  - mutableValue：修改
  - sendMessageToAllNeighbors()：发送给每个邻居同样的消息
  - getOutEdgeIterator()：发送不同值：
    - 得到OutEdgeIterator，依次访问邻边，sendMessageTo()发送消息
  - voteToHalt():
  - superstep():获取当前超步数，从0开始计数
- 实现函数：
  - Vertex:顶点、边、发送的消息都是double
  - compute(msg);

程序源码

#include <stdio.h>
#include <string.h>
#include <math.h>

#include <limits>
#include <iostream>
using namespace std;

//#include <array>
//#include <list>

#include "GraphLite.h"

#define VERTEX_CLASS_NAME(name) SSSP##name

#define EPS 1e-6

#include<climits>
//#define INF (numeric_limits<int>::max())
#define INF INT_MAX
int v0_id; // source_id

class VERTEX_CLASS_NAME(InputFormatter): public InputFormatter {
public:
    int64_t getVertexNum() {
        unsigned long long n;
        sscanf(m_ptotal_vertex_line, "%lld", &n);
        m_total_vertex= n;
        return m_total_vertex;
    }
    int64_t getEdgeNum() {
        unsigned long long n;
        sscanf(m_ptotal_edge_line, "%lld", &n);
        m_total_edge= n;
        return m_total_edge;
    }
    int getVertexValueSize() {
        m_n_value_size = sizeof(double);
        return m_n_value_size;
    }
    int getEdgeValueSize() {
        m_e_value_size = sizeof(double);
        return m_e_value_size;
    }
    int getMessageValueSize() {
        m_m_value_size = sizeof(double);
        return m_m_value_size;
    }
    void loadGraph() {
        unsigned long long last_vertex;
        unsigned long long from;
        unsigned long long to;
        double weight = 0;

        double value = 1;
        int outdegree = 0;

        const char *line= getEdgeLine();

        // Note: modify this if an edge weight is to be read
        //       modify the 'weight' variable

        sscanf(line, "%lld %lld %lf", &from, &to, &weight);
        addEdge(from, to, &weight);

        last_vertex = from;
        ++outdegree;
        for (int64_t i = 1; i < m_total_edge; ++i) {
            line= getEdgeLine();

            // Note: modify this if an edge weight is to be read
            //       modify the 'weight' variable

            sscanf(line, "%lld %lld %lf", &from, &to, &weight);
            if (last_vertex != from) {
                addVertex(last_vertex, &value, outdegree);
                last_vertex = from;
                outdegree = 1;
            } else {
                ++outdegree;
            }
            addEdge(from, to, &weight);
        }
        addVertex(last_vertex, &value, outdegree);
    }
};

class VERTEX_CLASS_NAME(OutputFormatter): public OutputFormatter {
public:
    void writeResult() {
        int64_t vid;
        double value;
        char s[1024];

        for (ResultIterator r_iter; ! r_iter.done(); r_iter.next() ) {
            r_iter.getIdValue(vid, &value);
            int n = sprintf(s, "%lld: %d\n", (unsigned long long)vid, (int)value);
            writeNextResLine(s, n);
        }
    }
};

// An aggregator that records a double value tom compute sum
class VERTEX_CLASS_NAME(Aggregator): public Aggregator<double> {
public:
    void init() {
        m_global = 0;
        m_local = 0;
    }
    void* getGlobal() {
        return &m_global;
    }
    void setGlobal(const void* p) {
        m_global = * (double *)p;
    }
    void* getLocal() {
        return &m_local;
    }
    void merge(const void* p) {
        m_global += * (double *)p;
    }
    void accumulate(const void* p) {
        m_local += * (double *)p;
    }
};

class VERTEX_CLASS_NAME(): public Vertex <double, double, double> {
public:
    void compute(MessageIterator* pmsgs) {
        int val;
    int source_id = (int)v0_id;
    //if(getSuperstep() == 0){
        if ((double)getVertexId() == source_id){
            val = 0;
            //printf("12312312312312111111111111");
        } else {
            val = INF;
        }


    //printf(" 12 %lf", (double)getVertexId());
        //if (getSuperstep() == 0) {
           //val= 10000;
    //   sendMessageToAllNeighbors(val);
        //} else {

        if (getSuperstep() >= 50) {
            double global_val = * (double *)getAggrGlobal(0);
            if (global_val < EPS) {
                voteToHalt(); return;
            }
        }
    //printf("!!!!!!!msg value %f\n", (double)pmsgs->getValue());
        for ( ; ! pmsgs->done(); pmsgs->next() ) {
                //sum += pmsgs->getValue();
        if (pmsgs->getValue() < val){
        val = pmsgs->getValue();
        /*if (1 == getVertexId()){
            printf("this %lf",(double)pmsgs->getValue());}  */  
        }
    /*  if(1==getVertexId()){
            printf("testttttt %f",(double)val);}*/
        }
    //  if(1==getVertexId()){printf("%f \n",(double)val);}

    //printf("the getValue()= %f \n",(double)getValue());
    if((val < getValue())|| getSuperstep() == 0)
    {
        if(1==getVertexId()){printf("%f \n",(double)val);}
        printf("getValue() = %f \n",(double)getValue());
        * mutableValue() = val;
            //double acc = fabs(getValue() - val);
            //accumulateAggr(0, &acc);
        OutEdgeIterator otherEdge = getOutEdgeIterator();
        for (; ! otherEdge.done(); otherEdge.next()){
        double vertex_id = otherEdge.target();
        double edge_value = otherEdge.getValue();
            //sendMessageTo(vertex_id, val + edge_value);
        //printf("the edge_value: %f \n", (double)edge_value);
        /*if ( getVertexId() == 0){
        printf("the vertex_id:%f \n", (double)vertex_id);
        }*/

        //if (vertex_id == source_id) {
        //  val = 0;
        //  //* mutableValue() = val;
        //  sendMessageTo(vertex_id, val + edge_value); 
        //  //printf("111: %lf", (double)val);
        //} else {
        sendMessageTo(vertex_id, val + edge_value);
        //}
            }
    }
//  * mutableValue() = val;
    //if(getVertexId() < 4){printf("\n%lf here is %lf \n",(double)getVertexId(),(double)val);}
        //const int64_t n = getOutEdgeIterator().size();
        //sendMessageToAllNeighbors(val / n);
    //printf("the end msg value %d", (int)pmsgs->getValue());
    voteToHalt();
    }
};

class VERTEX_CLASS_NAME(Graph): public Graph {
public:
    VERTEX_CLASS_NAME(Aggregator)* aggregator;

public:
    // argv[0]: PageRankVertex.so
    // argv[1]: <input path>
    // argv[2]: <output path>
    // argv[3]: <source-id>
    void init(int argc, char* argv[]) {

        setNumHosts(5);
        setHost(0, "localhost", 1411);
        setHost(1, "localhost", 1421);
        setHost(2, "localhost", 1431);
        setHost(3, "localhost", 1441);
        setHost(4, "localhost", 1451);

        if (argc < 3) {
           printf ("Usage: %s <input path> <output path>\n", argv[0]);
           exit(1);
        }

        m_pin_path = argv[1];
        m_pout_path = argv[2];

    v0_id = atoi(argv[3]);

        aggregator = new VERTEX_CLASS_NAME(Aggregator)[1];
        regNumAggr(1);
        regAggr(0, &aggregator[0]);
    }

    void term() {
        delete[] aggregator;
    }
};

/* STOP: do not change the code below. */
extern "C" Graph* create_graph() {
    Graph* pgraph = new VERTEX_CLASS_NAME(Graph);

    pgraph->m_pin_formatter = new VERTEX_CLASS_NAME(InputFormatter);
    pgraph->m_pout_formatter = new VERTEX_CLASS_NAME(OutputFormatter);
    pgraph->m_pver_base = new VERTEX_CLASS_NAME();

    return pgraph;
}

extern "C" void destroy_graph(Graph* pobject) {
    delete ( VERTEX_CLASS_NAME()* )(pobject->m_pver_base);
    delete ( VERTEX_CLASS_NAME(OutputFormatter)* )(pobject->m_pout_formatter);
    delete ( VERTEX_CLASS_NAME(InputFormatter)* )(pobject->m_pin_formatter);
    delete ( VERTEX_CLASS_NAME(Graph)* )pobject;
}

a550461053

关注

1
点赞
踩
7

收藏

觉得还不错? 一键收藏
0
评论
大数据系统与大规模数据分析之作业三

大数据系统与大规模数据分析之作业三问题描述SSSP编程同步图计算程序源码大数据系统与大规模数据分析之作业三问题描述作业三：同步图运算编程总体任务实现SSSP的图运算输入：图，v0输出：顶点ID，最短路长度SSSP编程SSSP定义:单源最短路径Single Source Shortest Path给定一个顶点v0，求v0到每个顶点的最短路径伪代码：function
复制链接

扫一扫