data-intensive text processing with mapreduce-Graph Algorithms

本文探讨了如何利用MapReduce并行处理图算法,包括Dijkstra单源最短路径算法的实现和PageRank的计算。通过自定义RecordReader生成key-value对,实现了在Hadoop上的MapReduce程序。文章总结了图处理中可能遇到的问题,并给出了整体的编程思路和代码示例。
摘要由CSDN通过智能技术生成

Graph Algorithms

Parallel Breadth-First Search

Dijkstra算法




MapReduce算法:

MainIdea:类似于水波扩散的方式,以一点为中心进行扩散,mapreduce每次迭代步长加一,并算出当前步长内到各节点的最短距离。
当边的权重固定为1时,迭代的结束条件为不再有节点随着步长的增加没有被访问过。
当边的权重不为1时,迭代的次数为节点数减一(naive),到每个节点的最短路径不再变化(Revised)。
在每一次迭代过程当中:
map的输入为(节点编号,[当前最短距离,邻接矩阵]),输出为当前节点的(节点编号,[当前最短距离,邻接矩阵]),以及(节点编号,通过该节点到达邻接节点的距离)
reduce输入为输入为map的输出,通过迭代values中(通过该节点到达邻接节点的距离)的value可得到其他节点到达该节点的距离,求出当前最短距离;当value为([当前最短距离,邻接矩阵]),可以得到邻接矩阵信息,最后输出(节点编号,[当前最短距离,邻接矩阵])
可以参看这个链接讲思路: http://www.zhizhihu.com/html/y2012/3928.html



Code

InputData:
linkWeight = 1
1	0	{2=1, 3=1}
2	1000	{4=1, 5=1}
3	1000	{10=1, 6=1, 7=1}
4	1000	{5=1, 6=1, 3=1}
5	1000	{8=1}
6	1000	{9=1, 8=1, 7=1}
7	1000	{8=1}
8	1000	{9=1}
9	1000	{5=1}
10	1000	{2=1, 9=1, 8=1}

linkWeight != 1
1	0	{2=6, 3=3, 18=100, 11=1}
2	1000	{4=9, 5=3, 1=4}
3	1000	{10=3, 6=6, 7=1}
4	1000	{5=1, 6=4, 3=3}
5	1000	{8=3, 1=4}
6	1000	{9=4, 8=7, 7=4}
7	1000	{8=2, 6=2}
8	1000	{9=3, 1=3}
9	1000	{5=8}
10	1000	{2=4, 9=6, 8=4}
11	1000	{12=1}
12	1000	{13=1}
13	1000	{14=1)
14	1000	{15=1}
15	1000	{16=1}
16	1000	{17=1}
17	1000	{18=1}
18	1000	{18=0}


边数据结构,主要包括权重,可扩展
package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.Writable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午2:07
 * To change this template use File | Settings | File Templates.
 */
public class linkMeta implements Writable {
	private int linkWeight;

	public linkMeta() {
	}

	public linkMeta(int linkWeight) {
		this.linkWeight = linkWeight;
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeInt(linkWeight);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		linkWeight = in.readInt();
	}

	@Override
	public String toString() {
		return "" + linkWeight;
	}

	public int getLinkWeight() {
		return linkWeight;
	}

	public void setLinkWeight(int linkWeight) {
		this.linkWeight = linkWeight;
	}
}

节点数据结构,对应map的输入中的value
package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.WritableComparable;

import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
import java.util.HashMap;


public class Node implements WritableComparable {

	private boolean node;

	private HashMap<Long, linkMeta> adjacencyList;

	private int currentDistance;

	public Node() {

	}


	@Override
	public boolean equals(Object obj) {
		if (obj instanceof Node) {
			Node that = (Node) obj;
			return this.isNode() == that.isNode() && that.getAdjacencyList().equals(that.getAdjacencyList()) && this.getCurrentDistance() == that.getCurrentDistance();
		}
		return false;
	}

	@Override
	public int hashCode() {
		return adjacencyList.hashCode();
	}

	@Override
	public String toString() {
		if (node == true)
			return currentDistance + "\t" + adjacencyList.toString();
		else
			return "" + currentDistance;
	}

	@Override
	public int compareTo(Object o) {
		Node that = (Node) o;
		if (that.isNode() == that.isNode())
			return ((Integer) currentDistance).compareTo(((Node) o).getCurrentDistance());
		else if (this.isNode()) {
			return 1;
		} else {
			return -1;
		}
	}

	@Override
	public void write(DataOutput out) throws IOException {
		out.writeBoolean(isNode());
		//为节点类型,才对adjacencyList序列化
		if (isNode()) {
			out.writeInt(adjacencyList.size());
			for (Long aLong : adjacencyList.keySet()) {
				out.writeLong(aLong);
				adjacencyList.get(aLong).write(out);
			}
		}
		out.writeInt(currentDistance);
	}

	@Override
	public void readFields(DataInput in) throws IOException {
		node = in.readBoolean();
		//为节点类型,才对adjacencyList反序列化
		if (isNode()) {
			adjacencyList = new HashMap<Long, linkMeta>();
			int size = in.readInt();
			long key;
			for (int i = 0; i < size; i++) {
				linkMeta linkMeta = new linkMeta();
				key = in.readLong();
				linkMeta.readFields(in);
				adjacencyList.put(key, linkMeta);
			}
		}
		currentDistance = in.readInt();
	}

	public HashMap<Long, linkMeta> getAdjacencyList() {
		return adjacencyList;
	}

	public void setAdjacencyList(HashMap<Long, linkMeta> adjacencyList) {
		this.adjacencyList = adjacencyList;
	}

	public boolean isNode() {
		return node;
	}

	public void setNode(boolean node) {
		this.node = node;
	}

	public int getCurrentDistance() {
		return currentDistance;
	}

	public void setCurrentDistance(int currentDistance) {
		this.currentDistance = currentDistance;
	}

	public void set(Node value) {
		this.node = value.isNode();
		this.adjacencyList = value.getAdjacencyList();
		this.currentDistance = value.getCurrentDistance();
	}
}

Counters
package GraphAlgorithms.PBFS;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-25
 * Time: 下午12:41
 * To change this template use File | Settings | File Templates.
 */

//用来统计访问到的节点、未访问到的节点以及变化的节点
public enum Finished {
	MAXDISTANCE, CHANGED, REACHED
}



Map阶段
package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;

import java.io.IOException;
import java.util.HashMap;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午3:15
 * To change this template use File | Settings | File Templates.
 */
public class MyMapper extends Mapper<LongWritable, Node, LongWritable, Node> {
	private final static int MAXDISTANCE = 1000;
	private LongWritable outKey = new LongWritable();
	private Node outValue = new Node();

	@Override
	protected void map(LongWritable key, Node value, Context context) throws IOException, InterruptedException {
		int distance = value.getCurrentDistance();
		value.setNode(true);
		context.write(key, value);
//		System.out.println(key + "\t" + value);
		HashMap<Long, linkMeta> adjacencyList = value.getAdjacencyList();
		for (Long aLong : adjacencyList.keySet()) {
			//当key节点已经访问到时,产生一个条由key到key相邻节点的路径(不一定最短,最短在reduce计算)
			if (distance != MAXDISTANCE && aLong != key.get()) {
				outKey.set(aLong);
				outValue.setNode(false);
				int linkWeight = adjacencyList.get(aLong).getLinkWeight();
				outValue.setCurrentDistance(distance + linkWeight);
				context.write(outKey, outValue);
//				System.out.println("-----" + outKey + "\t" + outValue);
			}

		}
	}
}

Reduce阶段
package GraphAlgorithms.PBFS;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;

import java.io.IOException;

/**
 * Created with IntelliJ IDEA.
 * User: ubuntu
 * Date: 13-11-24
 * Time: 下午9:06
 * To change this template use File | Settings | File Templates.
 */
public class MyReducer extends Reducer<LongWritable, Node, LongWritable, Node> {

	private final static int MAXDISTANCE = 1000;
	private Node outValue = new Node();
	private long sourceNode;


	@Override
	protected void setup(Context context) throws IOException, InterruptedException {
		sourceNode = Long.parseLong(context.getConfiguration().get("sourceNode"));
	}

	@Override
	protected void reduce(LongWritable key, Iterable<Node> values, Cont
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值