import java.io.IOException;
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class FindFriend {
public static class ChangeMapper extends Mapper<Object, Text, Text, Text> {
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
//对哪一行进行切分
StringTokenizer itr = new StringTokenizer(value.toString());
//自己
Text owner = new Text();
//排序set
Set<String> set = new TreeSet<String>();
//设置第一个key 比如A
owner.set(itr.nextToken());
while (itr.hasMoreTokens()) {
//然后set集合放下全部的,去重了
set.add(itr.nextToken());
}
//朋友
String[] friends = new String[set.size()];
friends = set.toArray(friends);
//转为数组
for (int i = 0; i < friends.length; i++) {
for (int j = i + 1; j < friends.length; j++) {
String outputkey = friends[i] + friends[j];
context.write(new Text(outputkey), owner);
}
}
}
}
public static class FindReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
String commonfriends = "";
for (Text val : values) {
if (commonfriends == "") {
commonfriends = val.toString();
} else {
commonfriends = commonfriends + ":" + val.toString();
}
}
context.write(key, new Text(commonfriends));
}
}
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("args error");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(FindFriend.class);
job.setMapperClass(ChangeMapper.class);
job.setCombinerClass(FindReducer.class);
job.setReducerClass(FindReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
/**
* 现在能理解了吗?
* 1/ 你是A 的朋友,那么朋友也有A ,是相互的,朋友是相互关系, 比如: A 有 B C ,那么B 也有A, C 也有A
* 2/ 如果A 有 B C , 并且 D 也有B C ,那么显而易见, BC他们有共同的朋友,就是AD啊,
* 原因是: B 也有A ,C也有, B有D,C也有D, BC的共同好友就是A D。恩恩
* 3/所以我们把一个人的共同好友作为对子,作为键,这样,有相同的value就到了一起了
* 4/然后我们就把value合并了。
* 5/最关键的是能把朋友的关系理顺。
*
*/
}
import java.util.Set;
import java.util.StringTokenizer;
import java.util.TreeSet;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.Mapper.Context;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;
public class FindFriend {
public static class ChangeMapper extends Mapper<Object, Text, Text, Text> {
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
//对哪一行进行切分
StringTokenizer itr = new StringTokenizer(value.toString());
//自己
Text owner = new Text();
//排序set
Set<String> set = new TreeSet<String>();
//设置第一个key 比如A
owner.set(itr.nextToken());
while (itr.hasMoreTokens()) {
//然后set集合放下全部的,去重了
set.add(itr.nextToken());
}
//朋友
String[] friends = new String[set.size()];
friends = set.toArray(friends);
//转为数组
for (int i = 0; i < friends.length; i++) {
for (int j = i + 1; j < friends.length; j++) {
String outputkey = friends[i] + friends[j];
context.write(new Text(outputkey), owner);
}
}
}
}
public static class FindReducer extends Reducer<Text, Text, Text, Text> {
public void reduce(Text key, Iterable<Text> values,
Context context) throws IOException, InterruptedException {
String commonfriends = "";
for (Text val : values) {
if (commonfriends == "") {
commonfriends = val.toString();
} else {
commonfriends = commonfriends + ":" + val.toString();
}
}
context.write(key, new Text(commonfriends));
}
}
public static void main(String[] args) throws IOException,
InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
String[] otherArgs = new GenericOptionsParser(conf, args).getRemainingArgs();
if (otherArgs.length < 2) {
System.err.println("args error");
System.exit(2);
}
Job job = new Job(conf, "word count");
job.setJarByClass(FindFriend.class);
job.setMapperClass(ChangeMapper.class);
job.setCombinerClass(FindReducer.class);
job.setReducerClass(FindReducer.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
for (int i = 0; i < otherArgs.length - 1; ++i) {
FileInputFormat.addInputPath(job, new Path(otherArgs[i]));
}
FileOutputFormat.setOutputPath(job,
new Path(otherArgs[otherArgs.length - 1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
/**
* 现在能理解了吗?
* 1/ 你是A 的朋友,那么朋友也有A ,是相互的,朋友是相互关系, 比如: A 有 B C ,那么B 也有A, C 也有A
* 2/ 如果A 有 B C , 并且 D 也有B C ,那么显而易见, BC他们有共同的朋友,就是AD啊,
* 原因是: B 也有A ,C也有, B有D,C也有D, BC的共同好友就是A D。恩恩
* 3/所以我们把一个人的共同好友作为对子,作为键,这样,有相同的value就到了一起了
* 4/然后我们就把value合并了。
* 5/最关键的是能把朋友的关系理顺。
*
*/
}