MapReduce之统计和列出大图中的三角形
在上一篇博客中,介绍了MapReduce的解决方案,在接下来展示完整代码
输入数据
1 2
2 3
2 4
2 5
3 4
4 5
阶段1:
mapper阶段任务
生成可能的三角形路径
mapper阶段编码
package com.deng.Graph;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class GraphEdgeMapper extends Mapper<LongWritable, Text,LongWritable,LongWritable> {
LongWritable k=new LongWritable();
LongWritable v=new LongWritable();
public void map(LongWritable key,Text value,Context context) throws IOException, InterruptedException {
String edge=value.toString();
String[] nodes=edge.split(" ");
long nodeA=Long.parseLong(nodes[0]);
long nodeB=Long.parseLong(nodes[1]);
k.set(nodeA);
v.set(nodeB);
context.write(k,v);
context.write(v,k);
}
}
reducer阶段任务
识别可能的三角形
reducer阶段编码
package com.deng.Graph;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
public class GraphEdgeReducer extends Reducer<LongWritable,LongWritable,PairOfLongs,LongWritable> {
PairOfLongs k=new PairOfLongs();
LongWritable v=new LongWritable();
public void reduce(LongWritable key,Iterable<LongWritable> values,Context context) throws IOException, InterruptedException {
List<Long> list=new ArrayList<>();
v.set(0); //0表示不存在
for(LongWritable value:values){
list.add(value.get());
k.set(key.get(),value.get());
context.write(k,v);
}
Collections.sort(list);
v.set(key.get());
for(int i=0;i<list.size()-1;i++){
for(int j=i+1;j<list.size();j++){
k.set(list.get(i),list.get(j));
context.write(k,v);
}
}
}
}
其中PairOfLongs编码如下:
package com.deng.Graph;
import org.apache.hadoop.io.WritableComparable;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
public class PairOfLongs implements WritableComparable<PairOfLongs> {
private Long left;
private Long right;
public PairOfLongs(){
}
public PairOfLongs(Long left,Long right){
set(left,right);
}
public void set(Long left,Long right){
this.left=left;
this.right=right;
}
public boolean equals(Object obj){
PairOfLongs pair=(PairOfLongs) obj;
return left==pair.getLeft()&&right==pair.getRight();
}
public Long getLeft() {
return left;
}
public Long getRight() {
return right;
}
@Override
public String toString() {
return "PairOfLongs{" +
"left=" + left +
", right=" + right +
'}';
}
@Override
public int compareTo(PairOfLongs o) {
long L=o.getLeft();
long R=o.getRight();
if(left==L){
if(right<R){
return -1;
}
if(right>R){
return 1;
}
return 0;
}
if(left<L){
return -1;
}
return 1;
}
@Override
public void write(DataOutput dataOutput) throws IOException {
dataOutput.writeLong(left);
dataOutput.writeLong(right);
}
@Override
public void readFields(DataInput dataInput) throws IOException {
left=dataInput.readLong();
right=dataInput.readLong();
}
}
阶段2:
mapper阶段任务
恒等映射器
mapper阶段编码
package com.deng.Graph;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
public class TriadsMapper extends Mapper<PairOfLongs, LongWritable,PairOfLongs,LongWritable> {
public void map(PairOfLongs key,LongWritable value,Context context) throws IOException, InterruptedException {
context.write(key,value);
}
}
reducer阶段任务
识别有重复的三角形
reducer阶段编码
package com.deng.Graph;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class TriadsReducer extends Reducer<PairOfLongs, LongWritable,Text, Text> {
static final Text Empty=new Text("");
public void reduce(PairOfLongs key,Iterable<LongWritable> values,Context context) throws IOException, InterruptedException {
List<Long> list=new ArrayList<>();
boolean haveSeenSpecialNodeZero=false;
for(LongWritable value:values){
long node=value.get();
if(node==0){
haveSeenSpecialNodeZero=true;
}else{
list.add(node);
}
}
if(haveSeenSpecialNodeZero){
if(list.isEmpty()){
return ;
}
Text triangle=new Text();
for(long node:list){
String triangleAsString=key.getLeft()+","+key.getRight()+","+node;
triangle.set(triangleAsString);
context.write(triangle,Empty);
}
}else{
return ;
}
}
}
阶段3:
mapper阶段任务
恒等映射器
mapper阶段编码
package com.deng.Graph;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
import java.util.Arrays;
public class UniqueTriadsMapper extends Mapper<Text, Text,Text,Text> {
static Text sortedKey=new Text();
public void map(Text key,Text value,Context context) throws IOException, InterruptedException {
String line=key.toString();
String[] nodes=line.split(",");
Arrays.sort(nodes);
sortedKey.set(nodes[0]+","+nodes[1]+","+nodes[2]);
context.write(sortedKey,value);
}
}
reducer阶段任务
生成唯一的三角形
reducer阶段编码
package com.deng.Graph;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class UniqueTriadsReducer extends Reducer<Text, Text,Text,Text> {
public void reduce(Text key,Iterable<Text> values,Context context) throws IOException, InterruptedException {
context.write(key,null);
}
}