- 需求:给出A-O个人中每个人的好友列表,求出哪些人两两之间有共同好友,以及他们的共同好友都有谁。
注意:这些人好友都是单向的,可能A是B的好友,但是B不一定是A的好友,这种类似的微博的关注,
A关注B,但是B不一定关注了A。 - 原始文件如下:
- 要求输出的格式如下:
- 思路分析:
⑴我们从上面可以现在我们知道A-O每个人拥有哪些好友,但是我们现在是要找出两两之间的人有哪些共同好友。那么
我们可以逆向思维,第一步找出哪些好友拥有A,哪些好友拥有B.....依次找出,结果如下:
⑵通过得出上面的数据后,我们可以对后面的好友进行排序,避免重复,将 “拥有这名朋友的所有人”进行两两配对,并将配对后的 字符串当做键,“朋友”当做值输出,即输出<人-人,共同朋友>
- 代码实现,通过两次job运算
a:FriendMapper01
package com.kgf.mapreduce.friend; import java.io.IOException; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class FriendMapper01 extends Mapper<LongWritable, Text, Text, Text>{ Text k =new Text(); Text v =new Text(); @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { //1:获取一行数据 String line = value.toString(); //2:对一行数据进行切割 String[] fields = line.split(":"); String person = fields[0]; String[] friends = fields[1].split(","); for (String friend : friends) { k.set(friend); v.set(person); context.write(k, v); } } }
b:FriendReducer
package com.kgf.mapreduce.friend; import java.io.IOException; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class FriendReducer extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); //1:获取哪些好友都有对应的人 for (Text text : values) { sb.append(text.toString()+","); } sb.deleteCharAt(sb.length()-1); context.write(key, new Text(sb.toString())); } }
c:FriendDriver01
package com.kgf.mapreduce.friend; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class FriendDriver01 { public static void main(String[] args) throws Exception { //1:获取Job对象 Configuration conf = new Configuration(); Job job = Job.getInstance(conf); //2:设置jar job.setJarByClass(FriendDriver01.class); //3:关联Mapper和reducer job.setMapperClass(FriendMapper01.class); job.setReducerClass(FriendReducer.class); //4:设置mapper输出参数 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //5:设置最终输出 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //6:设置文件输入输出路径 FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //7:提交 boolean result = job.waitForCompletion(true); System.exit(result?0:1); } }
d:FriengMapper02
package com.kgf.mapreduce.friend; import java.io.IOException; import java.util.Arrays; import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Mapper; public class FriengMapper02 extends Mapper<LongWritable, Text, Text, Text>{ @Override protected void map(LongWritable key, Text value,Context context) throws IOException, InterruptedException { //1:获取一行 String line = value.toString(); //2:切割数据 String[] fileds = line.split("\t"); String friend = fileds[0]; String[] persons = fileds[1].split(","); Arrays.sort(persons);//排序 for (int i = 0; i < persons.length; i++) { for (int j = i+1; j < persons.length; j++) { context.write(new Text(persons[i]+"-"+persons[j]),new Text(friend)); } } } }
e:FriendReducer2
package com.kgf.mapreduce.friend; import java.io.IOException; import java.util.HashSet; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Reducer; public class FriendReducer2 extends Reducer<Text, Text, Text, Text> { @Override protected void reduce(Text key, Iterable<Text> values,Context context) throws IOException, InterruptedException { StringBuffer sb = new StringBuffer(); HashSet<String> set = new HashSet<String>(); for (Text value : values) { String v = value.toString(); if(!set.contains(v)) { set.add(v); sb.append(v).append(","); } } sb.deleteCharAt(sb.length()-1); context.write(key, new Text(sb.toString())); } }
f:FriendDriver2
package com.kgf.mapreduce.friend; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.io.Text; import org.apache.hadoop.mapreduce.Job; import org.apache.hadoop.mapreduce.lib.input.FileInputFormat; import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat; public class FriendDriver2 { public static void main(String[] args) throws Exception { //1:获取Job对象 Configuration conf = new Configuration(); Job job = Job.getInstance(conf); //2:设置jar job.setJarByClass(FriendDriver2.class); //3:关联Mapper和reducer job.setMapperClass(FriengMapper02.class); job.setReducerClass(FriendReducer2.class); //4:设置mapper输出参数 job.setMapOutputKeyClass(Text.class); job.setMapOutputValueClass(Text.class); //5:设置最终输出 job.setOutputKeyClass(Text.class); job.setOutputValueClass(Text.class); //6:设置文件输入输出路径 FileInputFormat.setInputPaths(job, new Path(args[0])); FileOutputFormat.setOutputPath(job, new Path(args[1])); //7:提交 boolean result = job.waitForCompletion(true); System.exit(result?0:1); } }
MapReduce实现寻找共同好友
于 2019-08-11 15:05:54 首次发布