mapred找共同朋友，数据格式如下

最新推荐文章于 2020-11-18 18:47:18 发布

冥想者-定

最新推荐文章于 2020-11-18 18:47:18 发布

阅读量618

点赞数

分类专栏： ********面试 __MapReduce

__MapReduce 同时被 2 个专栏收录

75 篇文章 1 订阅

订阅专栏

********面试

16 篇文章 0 订阅

订阅专栏

import java.io.IOException;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class FindFriend {

public static class ChangeMapper extends Mapper<Object, Text, Text, Text> {
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
//对哪一行进行切分
StringTokenizer itr = new StringTokenizer(value.toString());
//自己
Text owner = new Text();
//排序set
Set<String> set = new TreeSet<String>();
//设置第一个key 比如A
owner.set(itr.nextToken());
while (itr.hasMoreTokens()) {
//然后set集合放下全部的，去重了
set.add(itr.nextToken());
}
//朋友
String[] friends = new String[set.size()];
friends = set.toArray(friends);
//转为数组
for (int i = 0; i < friends.length; i++) {
for (int j = i + 1; j < friends.length; j++) {
String outputkey = friends[i] + friends[j];
context.write(new Text(outputkey), owner);
}
}
}
}

public static class FindReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
String commonfriends = "";
for (Text val : values) {
if (commonfriends == "") {
commonfriends = val.toString();
} else {
commonfriends = commonfriends + ":" + val.toString();
}
}
context.write(key, new Text(commonfriends));
}
}

public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {

Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("args error");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(FindFriend.class);
job.setMapperClass(ChangeMapper.class);
job.setCombinerClass(FindReducer.class);
job.setReducerClass(FindReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
/**
* 现在能理解了吗？
* 1/ 你是A 的朋友，那么朋友也有A ，是相互的，朋友是相互关系，比如： A 有 B C ，那么B 也有A， C 也有A
* 2/ 如果A 有 B C ，并且 D 也有B C ，那么显而易见， BC他们有共同的朋友，就是AD啊，
* 原因是： B 也有A ，C也有， B有D，C也有D， BC的共同好友就是A D。恩恩
* 3/所以我们把一个人的共同好友作为对子，作为键，这样，有相同的value就到了一起了
* 4/然后我们就把value合并了。
* 5/最关键的是能把朋友的关系理顺。
*
*/
}

冥想者-定

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
mapred找共同朋友，数据格式如下

import java.io.IOException;import java.util.Set;import java.util.StringTokenizer;import java.util.TreeSet;import org.apache.hadoop.conf.Configuration;import org.apache.hadoop.fs.Path;i
复制链接

扫一扫

专栏目录