data-intensive text processing with mapreduce-Graph Algorithms-CSDN博客

本文链接：https://blog.csdn.net/li385805776/article/details/16360471

本文探讨了如何利用MapReduce并行处理图算法，包括Dijkstra单源最短路径算法的实现和PageRank的计算。通过自定义RecordReader生成key-value对，实现了在Hadoop上的MapReduce程序。文章总结了图处理中可能遇到的问题，并给出了整体的编程思路和代码示例。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Graph Algorithms

Parallel Breadth-First Search

Dijkstra算法

MapReduce算法：

MainIdea：类似于水波扩散的方式，以一点为中心进行扩散，mapreduce每次迭代步长加一，并算出当前步长内到各节点的最短距离。

当边的权重固定为1时，迭代的结束条件为不再有节点随着步长的增加没有被访问过。

当边的权重不为1时，迭代的次数为节点数减一（naive），到每个节点的最短路径不再变化(Revised)。

在每一次迭代过程当中：

map的输入为（节点编号，[当前最短距离，邻接矩阵]），输出为当前节点的（节点编号，[当前最短距离，邻接矩阵]），以及（节点编号，通过该节点到达邻接节点的距离）

reduce输入为输入为map的输出，通过迭代values中（通过该节点到达邻接节点的距离）的value可得到其他节点到达该节点的距离，求出当前最短距离；当value为（[当前最短距离，邻接矩阵]），可以得到邻接矩阵信息，最后输出（节点编号，[当前最短距离，邻接矩阵]）

可以参看这个链接讲思路： http://www.zhizhihu.com/html/y2012/3928.html

Code

InputData：

linkWeight = 1

1	0	{2=1, 3=1}
2	1000	{4=1, 5=1}
3	1000	{10=1, 6=1, 7=1}
4	1000	{5=1, 6=1, 3=1}
5	1000	{8=1}
6	1000	{9=1, 8=1, 7=1}
7	1000	{8=1}
8	1000	{9=1}
9	1000	{5=1}
10	1000	{2=1, 9=1, 8=1}

linkWeight ！= 1

1	0	{2=6, 3=3, 18=100, 11=1}
2	1000	{4=9, 5=3, 1=4}
3	1000	{10=3, 6=6, 7=1}
4	1000	{5=1, 6=4, 3=3}
5	1000	{8=3, 1=4}
6	1000	{9=4, 8=7, 7=4}
7	1000	{8=2, 6=2}
8	1000	{9=3, 1=3}
9	1000	{5=8}
10	1000	{2=4, 9=6, 8=4}
11	1000	{12=1}
12	1000	{13=1}
13	1000	{14=1)
14	1000	{15=1}
15	1000	{16=1}
16	1000	{17=1}
17	1000	{18=1}
18	1000	{18=0}

边数据结构，主要包括权重，可扩展

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午2:07
 * To change this template use File | Settings | File Templates.
 */
public class linkMeta implements Writable {
	private int linkWeight;

	public linkMeta() {
	}

	public linkMeta(int linkWeight) {
		this.linkWeight = linkWeight;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(linkWeight);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		linkWeight = in.readInt();
	}

	@Override
	public String toString() {
		return "" + linkWeight;
	}

	public int getLinkWeight() {
		return linkWeight;
	}

	public void setLinkWeight(int linkWeight) {
		this.linkWeight = linkWeight;
	}
}

节点数据结构，对应map的输入中的value

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;


public class Node implements WritableComparable {

	private boolean node;

	private HashMap<Long, linkMeta> adjacencyList;

	private int currentDistance;

	public Node() {

	}


	@Override
	public boolean equals(Object obj) {
		if (obj instanceof Node) {
			Node that = (Node) obj;
			return this.isNode() == that.isNode() && that.getAdjacencyList().equals(that.getAdjacencyList()) && this.getCurrentDistance() == that.getCurrentDistance();
		}
		return false;
	}

	@Override
	public int hashCode() {
		return adjacencyList.hashCode();
	}

	@Override
	public String toString() {
		if (node == true)
			return currentDistance + "\t" + adjacencyList.toString();
		else
			return "" + currentDistance;
	}

	@Override
	public int compareTo(Object o) {
		Node that = (Node) o;
		if (that.isNode() == that.isNode())
			return ((Integer) currentDistance).compareTo(((Node) o).getCurrentDistance());
		else if (this.isNode()) {
			return 1;
		} else {
			return -1;
		}
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeBoolean(isNode());
		//为节点类型，才对adjacencyList序列化
		if (isNode()) {
			out.writeInt(adjacencyList.size());
			for (Long aLong : adjacencyList.keySet()) {
				out.writeLong(aLong);
				adjacencyList.get(aLong).write(out);
			}
		}
		out.writeInt(currentDistance);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		node = in.readBoolean();
		//为节点类型，才对adjacencyList反序列化
		if (isNode()) {
			adjacencyList = new HashMap<Long, linkMeta>();
			int size = in.readInt();
			long key;
			for (int i = 0; i < size; i++) {
				linkMeta linkMeta = new linkMeta();
				key = in.readLong();
				linkMeta.readFields(in);
				adjacencyList.put(key, linkMeta);
			}
		}
		currentDistance = in.readInt();
	}

	public HashMap<Long, linkMeta> getAdjacencyList() {
		return adjacencyList;
	}

	public void setAdjacencyList(HashMap<Long, linkMeta> adjacencyList) {
		this.adjacencyList = adjacencyList;
	}

	public boolean isNode() {
		return node;
	}

	public void setNode(boolean node) {
		this.node = node;
	}

	public int getCurrentDistance() {
		return currentDistance;
	}

	public void setCurrentDistance(int currentDistance) {
		this.currentDistance = currentDistance;
	}

	public void set(Node value) {
		this.node = value.isNode();
		this.adjacencyList = value.getAdjacencyList();
		this.currentDistance = value.getCurrentDistance();
	}
}

Counters

package GraphAlgorithms.PBFS;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-25
 * Time: 下午12:41
 * To change this template use File | Settings | File Templates.
 */

//用来统计访问到的节点、未访问到的节点以及变化的节点
public enum Finished {
	MAXDISTANCE, CHANGED, REACHED
}

Map阶段

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.HashMap;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午3:15
 * To change this template use File | Settings | File Templates.
 */
public class MyMapper extends Mapper<LongWritable, Node, LongWritable, Node> {
	private final static int MAXDISTANCE = 1000;
	private LongWritable outKey = new LongWritable();
	private Node outValue = new Node();

	@Override
	protected void map(LongWritable key, Node value, Context context) throws IOException, InterruptedException {
		int distance = value.getCurrentDistance();
		value.setNode(true);
		context.write(key, value);
//		System.out.println(key + "\t" + value);
		HashMap<Long, linkMeta> adjacencyList = value.getAdjacencyList();
		for (Long aLong : adjacencyList.keySet()) {
			//当key节点已经访问到时，产生一个条由key到key相邻节点的路径（不一定最短，最短在reduce计算）
			if (distance != MAXDISTANCE && aLong != key.get()) {
				outKey.set(aLong);
				outValue.setNode(false);
				int linkWeight = adjacencyList.get(aLong).getLinkWeight();
				outValue.setCurrentDistance(distance + linkWeight);
				context.write(outKey, outValue);
//				System.out.println("-----" + outKey + "\t" + outValue);
			}

		}
	}
}

Reduce阶段

package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午9:06
 * To change this template use File | Settings | File Templates.
 */
public class MyReducer extends Reducer<LongWritable, Node, LongWritable, Node> {

	private final static int MAXDISTANCE = 1000;
	private Node outValue = new Node();
	private long sourceNode;


	@Override
	protected void setup(Context context) throws IOException, InterruptedException {
		sourceNode = Long.parseLong(context.getConfiguration().get("sourceNode"));
	}

	@Override
	protected void reduce(LongWritable key, Iterable<Node> values, Cont