1.算法说明
1、直接相连的表示两个人是直接好友关系;
2、两个人有相同的好友表示两个人是间接好友(当然可能两个人同时也是直接好友,如图hello和hive)。
- 好友推荐列表就是按照两个用户的共同好友数量排名。
2.项目内容
mapper
package org.hadoop.friends;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.util.StringUtils;
import java.io.IOException;
public class FriendsRecommendMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
Text mkey = new Text();
IntWritable mval = new IntWritable();
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
//不能用双引号,要用单引号 将传递过来的值进行分割
String[] strs = StringUtils.split(value.toString(), ' ');
// 直接好友的 key为直接好友列表 value为0
for (int i = 1; i < strs.length; i++) {
//直接好友关系
mkey.set(fof(strs[0], strs[i]));
mval.set(0);
context.write(mkey, mval);
//间接好友关系 设置value为1
for (int j = i + 1; j < strs.length; j++) {
mkey.set(fof(strs[i], strs[j]));
mval.set(1);
context.write(mkey, mval);
}
}
}
//两个共同好友的间接好友之间,可能存在 B C 和C B 的情况,但是比对累加时,计算机不识别,所以需要字典排序
private static String fof(String str1, String str2) {
//compareTo比较的 正数说明大
if (str1.compareTo(str2) > 0) {
return str2 + ":" + str1;
}
return str1 + ":" + str2;
}
}
Reducer
package org.hadoop.friends;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
public class FriendsRecommendReduce extends Reducer<Text, IntWritable, Text, IntWritable> {
IntWritable rval = new IntWritable();
@Override
protected void reduce(Text key, Iterable<IntWritable> vals,Context context)
throws IOException, InterruptedException {
int sum=0;
for (IntWritable v : vals) {
//发现直接是0的 是直接好友 舍弃
if (v.get() ==0 ) {
return ;
}
sum +=1;
}
rval.set(sum);
context.write(key, rval);
}
}
Recommend
package org.hadoop.friends;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.hadoop.friends.FriendsRecommendMapper;
import org.hadoop.friends.FriendsRecommendReduce;
import java.io.IOException;
public class FriendsRecommend {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(FriendsRecommend.class);
Path input = new Path(args[0]);
FileInputFormat.addInputPath(job, input);
Path output = new Path(args[1]);
//如果文件存在,,删除文件,方便后续调试代码
if (output.getFileSystem(conf).exists(output)) {
output.getFileSystem(conf).delete(output,true);
}
FileOutputFormat.setOutputPath(job, output);
job.setMapperClass(FriendsRecommendMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setReducerClass(FriendsRecommendReduce.class);
job.waitForCompletion(true);
}
}
package org.hadoop.friends;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.hadoop.friends.FriendsRecommendMapper;
import org.hadoop.friends.FriendsRecommendReduce;
import java.io.IOException;
public class FriendsRecommend {
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = Job.getInstance(conf);
job.setJarByClass(FriendsRecommend.class);
Path input = new Path(args[0]);
FileInputFormat.addInputPath(job, input);
Path output = new Path(args[1]);
//如果文件存在,,删除文件,方便后续调试代码
if (output.getFileSystem(conf).exists(output)) {
output.getFileSystem(conf).delete(output,true);
}
FileOutputFormat.setOutputPath(job, output);
job.setMapperClass(FriendsRecommendMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setReducerClass(FriendsRecommendReduce.class);
job.waitForCompletion(true);
}
}
3.项目实现步骤
1
在maven视图下点击package进行打包
成功获取jar包
2.创建数据存放目录
创建testData,friend目录
并用rz工具上传打包好的jar包和friend.txt测试文件
2.分布式文件系统上传测试数据
(1) 将本地的friend.txt上传到虚拟机
(2)首先确定Hadoop集群是否已经开启
[
(3)分布式文件系统创建input目录并且input目录上传测试文件friend.txt
(4)执行程序
hadoop jar friends-recommend-1.0-SNAPSHOT.jar /input /output
(5)查看结果
hdfs dfs -cat /output/part-r-00000