第一步:Git下载GraphLite到ubuntu系统
$ wget -r -O GraphLite-master.zip "https://codeload.github.com/schencoding/GraphLite/zip/master"
解压下载的文件到/home/hadoop/GraphLite-0.20
$ unzip GraphLite-master.zip
第二步:配置环境变量
$ sudo vim /etc/profile
#添加以下内容
# GraphLite
export GRAPHLITE_HOME=/home/hadoop/GraphLite-0.20
export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64:$JAVA_HOME/jre/lib/amd64/server
#并更新环境变量
$ source /etc/profile
第三步:下载相关支持
$ sudo apt-get install protobuf-c-compiler libprotobuf-c0 libprotobuf-c0-dev
$ sudo apt-get install make
$ sudo apt-get install g++
第四步:设置GraphLite-0.20文件下的JAVA_HOME, HADOOP_HOME, GRAPHLITE_HOME
$ vim bin/setenv
#添加内容
export JAVA_HOME=/usr/local/java/jdk1.8.0_121
export HADOOP_HOME=/home/hadoop/hadoop-2.7.3
export GRAPHLITE_HOME=/home/hadoop/GraphLite-0.20
export LD_LIBRARY_PATH=$JAVA_HOME/jre/lib/amd64:$JAVA_HOME/jre/lib/amd64/server
第五步:在engine/目录下make工程
$ cd engine
$ make
第六步:在example/目录下make工程
报如下错误
g++ -std=c++0x -g -O2 -I/usr/local/hadoop-2.7.3/include -I/usr/local/563home/GraphLite-0.20/include PageRankVertex.cc -fPIC -shared -o PageRankVertex.so
PageRankVertex.cc:33:23: fatal error: GraphLite.h: 没有那个文件或目录
#include "GraphLite.h"
^
compilation terminated.
make: *** [PageRankVertex.so] 错误 1
找不到GraphLite.h目录:检查环境变量是 GRAPHLITE_HOME写错了
第七步:cd 到GraphLite-0.20根目录下执行命令
$ start-graphlite example/PageRankVertex.so Input/facebookcombined_4w Output/out
注意在每次开启shell并运行example时都要先执行指令:
$ ./bin/setenv
第八步:我对PageRankVertex.cc代码的理解
#include <stdio.h>
#include <string.h>
#include <math.h>
#include "GraphLite.h"
#define VERTEX_CLASS_NAME(name) PageRankVertex##name //宏定义,不解释
#define EPS 1e-6 // PageRank结束的delta
class VERTEX_CLASS_NAME(InputFormatter): public InputFormatter {
/* 对输入文件内容的读取类
*/
public:
int64_t getVertexNum() {
unsigned long long n;
sscanf(m_ptotal_vertex_line, "%lld", &n);
m_total_vertex= n;
return m_total_vertex;
}
int64_t getEdgeNum() {
unsigned long long n;
sscanf(m_ptotal_edge_line, "%lld", &n);
m_total_edge= n;
return m_total_edge;
}
int getVertexValueSize() {
m_n_value_size = sizeof(double);
return m_n_value_size;
}
int getEdgeValueSize() {
m_e_value_size = sizeof(double);
return m_e_value_size;
}
int getMessageValueSize() {
m_m_value_size = sizeof(double);
return m_m_value_size;
}
// 该函数被我修改过(为了代码简洁)
void loadGraph() {
unsigned long long last_vertex;
unsigned long long from;
unsigned long long to;
int outdegree = 0; //节点的出度
double value = 0.0; //节点的权重
double weight = 1.0; //边权重
/* 因为文件的存取是from有序排列并在同一个文件中
所以添加Vertex时是按照from添加(因为不会重复添加)
*/
for (int64_t i = 0; i < m_total_edge; i++) {
const char *line= getEdgeLine();
sscanf(line, "%lld %lld", &from, &to); //注意读取文件的读取格式
if ( i != 0 && last_vertex != from) {
addVertex(last_vertex, &value, outdegree);
last_vertex = from;
outdegree = 1;
} else if( i == 0 ){
last_vertex = from;
outdegree = 1;
} else{
outdegree++;
}
addEdge(from, to, &weight);
}
addVertex(last_vertex, &value, outdegree);
}
};
class VERTEX_CLASS_NAME(OutputFormatter): public OutputFormatter {
/* 将节点的信息输出到目标文件
*/
public:
// 只需重写该方法即可
void writeResult() {
int64_t vid;
double value;
char s[1024];
for (ResultIterator r_iter; ! r_iter.done(); r_iter.next() ) {
r_iter.getIdValue(vid, &value);
int n = sprintf(s, "%lld: %f\n", (unsigned long long)vid, value);
writeNextResLine(s, n);
}
}
};
// An aggregator that records a double value tom compute sum
class VERTEX_CLASS_NAME(Aggregator): public Aggregator<double> {
/* 定义所有的Aggregator类
我的理解是Master有一个超级Aggregator来维护m_global( 存放所有图节点的delta PageRank值 )
每个Worker有一个Aggregator来维护超级m_local值,即该Worker下对应Verter节点的 delta PageRank值
每个Worker中的Vertex类有一个Aggregator来维护对应图节点的delta PageRank值。
*/
public:
void init() {
/* 每个超步内Vertex会新建一个Aggregator,并在超步结束时销毁。
每个超步内Worker会新建一个Aggregator,并在超步结束时销毁。
每个超步内Master会新建一个Aggregator,并在整个程序结束时销毁。
*/
m_global = 0;
m_local = 0;
}
void* getGlobal() {
return &m_global;
}
void setGlobal(const void* p) {
m_global = * (double *)p;
}
void* getLocal() {
return &m_local;
}
void merge(const void* p) {
// 每个超步结束时,Master会将所有Worker发送来的->(Worker内sum delta PageRank)累加。
m_global += * (double *)p;
}
void accumulate(const void* p) {
// 每个超步内,Worker会将所有Vertex发送来的->(delta PageRank)累加。
m_local += * (double *)p;
}
};
class VERTEX_CLASS_NAME(Graph): public Graph {
/* 构建整个图
*/
public:
// 该aggergator其实就是Master中的global aggregator
VERTEX_CLASS_NAME(Aggregator)* aggregator;
public:
// argv[0]: PageRankVertex.so
// argv[1]: <input path>
// argv[2]: <output path>
void init(int argc, char* argv[]) {
//申请worker的个数
setNumHosts(5);
setHost(0, "localhost", 1411);
setHost(1, "localhost", 1421);
setHost(2, "localhost", 1431);
setHost(3, "localhost", 1441);
setHost(4, "localhost", 1451);
if (argc < 3) {
printf ("Usage: %s <input path> <output path>\n", argv[0]);
exit(1);
}
m_pin_path = argv[1];
m_pout_path = argv[2];
aggregator = new VERTEX_CLASS_NAME(Aggregator)[1];
//在这里可以对aggregator中的m_global进行赋值操作
regNumAggr(1); //注册聚合器的个数
regAggr(0, &aggregator[0]); //注册聚合器
}
void term() {
delete[] aggregator;
}
};
class VERTEX_CLASS_NAME(): public Vertex <double, double, double> {
/* 整个图的节点类
*/
public:
// compute函数是每个超步内每个Worker中的每个Vertex都要执行的方法。
void compute(MessageIterator* pmsgs) {
double val;
if ( getSuperstep() == 0 ) { val = 1.0; }
else {
if ( getSuperstep() >= 2 ) {
double global_val = * (double *)getAggrGlobal(0);
if (global_val < EPS) {
voteToHalt(); return;
}
}
double sum = 0;
for ( ; ! pmsgs->done(); pmsgs->next() ) {
sum += pmsgs->getValue();
}
val = 0.15 + 0.85 * sum; // pagerank更新公式
// 以下2行是 给该Vertex中的Aggregator赋值,为->(delta PageRank)
double acc = fabs(getValue() - val);
accumulateAggr(0, &acc); //整个图聚合器的ID=0,值为acc。
}
* mutableValue() = val; //给该节点Vertex赋点权,值为val
const int64_t n = getOutEdgeIterator().size();
sendMessageToAllNeighbors(val / n);
}
};
/* STOP: do not change the code below. */
extern "C" Graph* create_graph() {
Graph* pgraph = new VERTEX_CLASS_NAME(Graph);
pgraph->m_pin_formatter = new VERTEX_CLASS_NAME(InputFormatter);
pgraph->m_pout_formatter = new VERTEX_CLASS_NAME(OutputFormatter);
pgraph->m_pver_base = new VERTEX_CLASS_NAME();
return pgraph;
}
extern "C" void destroy_graph(Graph* pobject) {
delete ( VERTEX_CLASS_NAME()* )(pobject->m_pver_base);
delete ( VERTEX_CLASS_NAME(OutputFormatter)* )(pobject->m_pout_formatter);
delete ( VERTEX_CLASS_NAME(InputFormatter)* )(pobject->m_pin_formatter);
delete ( VERTEX_CLASS_NAME(Graph)* )pobject;
}
第九步:补充的一些内容
1、可以将图信息分裂为k个可读文件,k的个数是XXX.cc代码中设置的Worker个数。分裂指令如下:
$ hash-partitioner.pl <File Path> <K>
$ hash-partitioner.pl Input/facebookcombined 4
2、C++中的字符转换成double类型
char *x_str = "34.33333";
double KCore = 0.0;
sscanf(x_str,"%lf",&KCore_K); //字符串转换成double类型
3、GraphLlie程序中include头文件关系说明