MapReduce实现寻找共同好友

爱上口袋的天空

已于 2022-06-12 10:25:09 修改

阅读量1.7k

点赞数 4

分类专栏： # hadoop3.x 文章标签： hadoop

于 2019-08-11 15:05:54 首次发布

本文链接：https://blog.csdn.net/K_520_W/article/details/99191704

版权

hadoop3.x 专栏收录该内容

30 篇文章 2 订阅

订阅专栏

需求：给出A-O个人中每个人的好友列表，求出哪些人两两之间有共同好友，以及他们的共同好友都有谁。
注意：这些人好友都是单向的，可能A是B的好友，但是B不一定是A的好友，这种类似的微博的关注，
A关注B，但是B不一定关注了A。
原始文件如下：
要求输出的格式如下：
思路分析：
⑴我们从上面可以现在我们知道A-O每个人拥有哪些好友，但是我们现在是要找出两两之间的人有哪些共同好友。那么
我们可以逆向思维，第一步找出哪些好友拥有A,哪些好友拥有B.....依次找出，结果如下：

⑵通过得出上面的数据后，我们可以对后面的好友进行排序，避免重复，将 “拥有这名朋友的所有人”进行两两配对，并将配对后的字符串当做键，“朋友”当做值输出，即输出<人-人，共同朋友>

代码实现，通过两次job运算
a：FriendMapper01

package com.kgf.mapreduce.friend;

import java.io.IOException;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FriendMapper01 extends Mapper<LongWritable, Text, Text, Text>{

	Text k  =new Text();
	Text v  =new Text();
	
	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		//1：获取一行数据
		String line = value.toString();
		//2：对一行数据进行切割
		String[] fields = line.split(":");
		String person = fields[0];
		String[] friends = fields[1].split(",");
		for (String friend : friends) {
			k.set(friend);
			v.set(person);
			context.write(k, v);
		}
	}
}

b：FriendReducer

package com.kgf.mapreduce.friend;

import java.io.IOException;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FriendReducer extends Reducer<Text, Text, Text, Text> {

	@Override
	protected void reduce(Text key, Iterable<Text> values,Context context)
			throws IOException, InterruptedException {
		
		StringBuffer sb = new StringBuffer();
		//1：获取哪些好友都有对应的人
		for (Text text : values) {
			sb.append(text.toString()+",");
		}
		sb.deleteCharAt(sb.length()-1);
		context.write(key, new Text(sb.toString()));
	}
}

c：FriendDriver01

package com.kgf.mapreduce.friend;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FriendDriver01 {

	public static void main(String[] args) throws Exception {
		//1：获取Job对象
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		//2:设置jar
		job.setJarByClass(FriendDriver01.class);
		
		//3:关联Mapper和reducer
		job.setMapperClass(FriendMapper01.class);
		job.setReducerClass(FriendReducer.class);
		
		//4:设置mapper输出参数
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		//5：设置最终输出
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		//6:设置文件输入输出路径
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		//7：提交
		boolean result = job.waitForCompletion(true);
		System.exit(result?0:1);
	}
	
}

d：FriengMapper02

package com.kgf.mapreduce.friend;

import java.io.IOException;
import java.util.Arrays;

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;

public class FriengMapper02 extends Mapper<LongWritable, Text, Text, Text>{

	@Override
	protected void map(LongWritable key, Text value,Context context)
			throws IOException, InterruptedException {
		//1：获取一行
		String line = value.toString();
		//2：切割数据
		String[] fileds = line.split("\t");
		String friend = fileds[0];
		String[] persons = fileds[1].split(",");
		Arrays.sort(persons);//排序
		for (int i = 0; i < persons.length; i++) {
			for (int j = i+1; j < persons.length; j++) {
				context.write(new Text(persons[i]+"-"+persons[j]),new Text(friend));
			}
		}
	}
	
}

e：FriendReducer2

package com.kgf.mapreduce.friend;

import java.io.IOException;
import java.util.HashSet;

import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;

public class FriendReducer2 extends Reducer<Text, Text, Text, Text> {

	@Override
	protected void reduce(Text key, Iterable<Text> values,Context context)
			throws IOException, InterruptedException {
		
		StringBuffer sb = new StringBuffer();
		HashSet<String> set = new HashSet<String>();
		
		for (Text value : values) {
			String v = value.toString();
			if(!set.contains(v)) {
				set.add(v);
				sb.append(v).append(",");
			}
		}
		sb.deleteCharAt(sb.length()-1);
		context.write(key, new Text(sb.toString()));
	}
}

f：FriendDriver2

package com.kgf.mapreduce.friend;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class FriendDriver2 {

	public static void main(String[] args) throws Exception {
		//1：获取Job对象
		Configuration conf = new Configuration();
		Job job = Job.getInstance(conf);
		
		//2:设置jar
		job.setJarByClass(FriendDriver2.class);
		
		//3:关联Mapper和reducer
		job.setMapperClass(FriengMapper02.class);
		job.setReducerClass(FriendReducer2.class);
		
		//4:设置mapper输出参数
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(Text.class);
		
		//5：设置最终输出
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(Text.class);
		
		//6:设置文件输入输出路径
		FileInputFormat.setInputPaths(job, new Path(args[0]));
		FileOutputFormat.setOutputPath(job, new Path(args[1]));
		
		//7：提交
		boolean result = job.waitForCompletion(true);
		System.exit(result?0:1);
	}
	
}

爱上口袋的天空

关注

4
点赞
踩
28

收藏

觉得还不错? 一键收藏
0
评论
MapReduce实现寻找共同好友

需求：给出A-O个人中每个人的好友列表，求出哪些人两两之间有共同好友，以及他们的共同好友都有谁。注意：这些人好友都是单向的，可能A是B的好友，但是B不一定是A的好友，这种类似的微博的关注， A关注B，但是B不一定关注了A。原始文件如下：要求输出的格式如下：思路分析： ⑴我们从上面可以现在我们知道A-O每个人拥有哪些好友......
复制链接

扫一扫