最近参与了一个项目,其中有一个就是如何给用户推荐好友。推荐的办法就是寻找该用户的好友的好友,而且该好友的好友不能是自己的好友。
由于系统数据量比较大,故采用mapreduce的方式去处理。如下是笔者的详细代码:
package hadoop.secondfriend;
import java.io.IOException;
import java.util.HashSet;
import java.util.Set;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;
public class SecondDegreeFriends extends Configured implements Tool {
private static String srcFile = "/home/typery/temp/hadoop_test/deg2friends/src_data";
private static String midDataPath = "/home/typery/temp/hadoop_test/deg2friends/midtemp_data";
private static String destPath = "/home/typery/temp/hadoop_test/deg2friends/dest_data";
private static int reduceTaskNum = 2;
public static void main(String[] args) {
int ret = -1;
try {
ret = ToolRunner.run(new SecondDegreeFriends(), args);
} catch (Exception e) {
e.printStackTrace();
}
System.exit(ret);
}
@Override
public int run(String[] args) throws Exception {
Configuration conf = new Configuration();
FileSystem hdfs = FileSystem.get(conf);
if(hdfs.exists(new Path(midDataPath)))
hdfs.delete(new Path(midDataPath), true);
if(hdfs.exists(new Path(destPath)))
hdfs.delete(new Path(destPath), true);
Job deg1Job = Job.getInstance(conf);
deg1Job.setJobName("DEG1_FIND_FRIENDS");
deg1Job.setJarByClass(this.getClass());
deg1Job.setNumReduceTasks(reduceTaskNum);
deg1Job.setMapperClass(Deg1Mapper.class);
deg1Job.setReducerClass(Deg1Reducer.class);
deg1Job.setInputFormatClass(TextInputFormat.class);
deg1Job.setOutputFormatClass(TextOutputFormat.class);
deg1Job.setMapOutputKeyClass(Text.class);
deg1Job.setMapOutputValueClass(Text.class);
deg1Job.setOutputKeyClass(Text.class);
deg1Job.setOutputValueClass(Text.class);
TextInputFormat.addInputPath(deg1Job, new Path(srcFile));
TextOutputFormat.setOutputPath(deg1Job, new Path(midDataPath));
if (deg1Job.waitForCompletion(true)) {
Job deg2Job = Job.getInstance(conf);
deg2Job.setJarByClass(this.getClass());
deg2Job.setJobName("DEG2_FIND_Friends");
deg2Job.setNumReduceTasks(reduceTaskNum);
deg2Job.setMapperClass(Deg2Mapper.class);
deg2Job.setReducerClass(Deg2Reducer.class);
deg2Job.setInputFormatClass(TextInputFormat.class);
deg2Job.setOutputFormatClass(TextOutputFormat.class);
deg2Job.setMapOutputKeyClass(Text.class);
deg2Job.setMapOutputValueClass(Text.class);
deg2Job.setOutputKeyClass(Text.class);
deg2Job.setOutputValueClass(Text.class);
TextInputFormat.addInputPath(deg2Job, new Path(midDataPath));
TextOutputFormat.setOutputPath(deg2Job, new Path(destPath));
return deg2Job.waitForCompletion(true) ? 0 : -1;
}
return -1;
}
/**
* 计算一度好友的MAP
*
* @author typery
*
*/
public static class Deg1Mapper extends Mapper<Object, Text, Text, Text> {
@Override
public void map(Object k, Text v, Context context) throws IOException,
InterruptedException {
String[] deg1Friends = v.toString().split("\t");
context.write(new Text(deg1Friends[0]), new Text(deg1Friends[1]));
context.write(new Text(deg1Friends[1]), new Text(deg1Friends[0]));
}
}
/**
* 计算一度好友的reduce
*
* @author typery
*
*/
public static class Deg1Reducer extends Reducer<Text, Text, Text, Text> {
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
Set<String> friendSet = new HashSet<>();
for (Text value : values) {
friendSet.add(value.toString());
context.write(key, new Text(value.toString() + "\tdeg1"));
}
for (String friendA : friendSet) {
for (String friendB : friendSet) {
if (!friendA.equals(friendB))
context.write(new Text(friendA), new Text(friendB
+ "\tdeg2"));
}
}
}
}
/**
* 计算二度好友的MAP
*
* @author typery
*
*/
public static class Deg2Mapper extends Mapper<Object, Text, Text, Text> {
@Override
public void map(Object key, Text value, Context context)
throws IOException, InterruptedException {
String friends[] = value.toString().split("\t");
context.write(new Text(friends[0] + "\t" + friends[1]), new Text(
friends[2]));
}
}
/**
* 计算二度好友的REDUCE
*
* @author typery
*
*/
public static class Deg2Reducer extends Reducer<Text, Text, Text, Text> {
@Override
public void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
String friendAB = key.toString();
int deg1 = 0, deg2 = 0;
for (Text relations : values) {
if (relations.toString().equals("deg1"))
deg1++;
else
deg2++;
}
if (deg1 == 0 && deg2 > 0)
context.write(new Text(friendAB),
new Text(String.valueOf(deg2)));
}
}
}
在本地调试的样例数据如下:
Lily Lucy
Lily Tom
Lily Tim
Lily Jim
Lily Kate
Kate Tom
Kate Jim
Tim LiLei
Tim Lily
Tim HanMeimei
HanMeimei Kate
HanMeimei LiLei
HanMeimei Lucy
输出结果如下:
用户A 用户B 共有多少个共同好友
Jim Tim 1
Jim Tom 2
Kate LiLei 1
Kate Lucy 2
LiLei Kate 1
LiLei Lucy 1
Lucy Kate 2
Lucy LiLei 1
Tim Jim 1
Tim Tom 1
Tom Jim 2
Tom Tim 1
HanMeimei Jim 1
HanMeimei Lily 3
………………