目录
1.项目说明
- 互为推荐关系
- 非好友的两个人之间存在相同好友则互为推荐关系
- 朋友圈两个非好友的人,存在共同好友人数越多,越值得推荐
- 存在一个共同好友,值为1;存在多个值累加
2.需求
- 程序要求,给每个人推荐可能认识的人
- 互为推荐关系值越高,越值得推荐
- 每个用户,推荐值越高的可能认识的人排在前面
数据
/*
xiaoming laowang renhua linzhiling
laowang xiaoming fengjie
renhua xiaoming ligang fengjie
linzhiling xiaoming ligang fengjie guomeimei
ligang renhua fengjie linzhiling
guomeimei fengjie linzhiling
fengjie renhua laowang linzhiling guomeimei
*/
3.流程
启动虚拟机-->代码实现-->启动集群(yarn)-->
3.1代码实现
在项目的src/main/java
目录下创建文件RecommendFriendJob
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
@Slf4j
public class RecommendFriendJob {
/**
*
* @param args 0|profile;1|input;2|output;3|master-ip;4|operator;5|homeDir
* @throws Exception
*/
public static void main(String[] args) throws Exception {
Configuration configuration = new Configuration();
Job job = Job.getInstance(configuration);
job.setJarByClass(RecommendFriendJob.class);
job.setJobName("recommendFriend");
Path inputPath = new Path("/input/friend.txt");
FileInputFormat.addInputPath(job, inputPath);
Path outputPath = new Path("/output");
if(outputPath.getFileSystem(configuration).exists(outputPath)) {
outputPath.getFileSystem(configuration).delete(outputPath, true);
}
FileOutputFormat.setOutputPath(job, outputPath);
job.setMapperClass(ReconmentFriendMapper.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(IntWritable.class);
job.setNumReduceTasks(1);
job.setReducerClass(RecommendFriendReducer.class);
boolean isSuccess = job.waitForCompletion(true);
log.info("isSuccess:" + isSuccess);
System.exit(isSuccess ? 0 : 1);
}
}
在项目的src/main/java
目录下创建文件 RecommendFriendReducer
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
import java.io.IOException;
@Slf4j
public class RecommendFriendReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
@Override
public void reduce(Text key, Iterable<IntWritable> values, Context context) throws IOException, InterruptedException {
log.info("key:" + key);
int sum = 0;
boolean isDirectFriend = false;
for(IntWritable value : values) {
if(value.get() == 0) {
// 直接好友
log.info("direct friend");
isDirectFriend = true;
break;
}
sum = sum + value.get();
}
if(!isDirectFriend) {
context.write(key, new IntWritable(sum));
}
}
}
在项目的src/main/java
目录下创建文件ReconmendFriendMapper
import lombok.extern.slf4j.Slf4j;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
import java.io.IOException;
@Slf4j
public class ReconmendFriendMapper extends Mapper<Object, Text, Text, IntWritable> {
/**
*
* map的key为好友名,value为0|直接好友;1|可能是间接好友,需要在reduce中进行进一步处理
*
*/
@Override
public void map(Object key, Text value, Context context) throws IOException, InterruptedException {
log.info("key:" + key + ",value:" + value);
String[] friends = value.toString().split(" ");
for(int i=0; i < friends.length; i++) {
String self = friends[i];
for(int j=i+1; j < friends.length; j++) {
log.info("i:" + i + ",j:" + j);
if(i == 0) {
// 直接好友
String directFriend = friends[j];
Text directFriendKey = new Text(sort(self, directFriend));
log.info("direct:" + directFriendKey.toString());
context.write(directFriendKey, new IntWritable(0));
} else {
// 可能是间接好友
String indirectFriend = friends[j];
Text indirectFriendKey = new Text(sort(self, indirectFriend));
log.info("indirect:" + indirectFriendKey.toString());
context.write(indirectFriendKey, new IntWritable(1));
}
}
}
}
private String sort(String self, String directFriend) {
if(self.compareToIgnoreCase(directFriend) < 0) {
return directFriend + " " + self;
}
return self + " " + directFriend;
}
}
3.2分布式文件系统上传并测试数据
打包
注:打包前一定注释掉主文件夹下其他程序
上传程序
- 启动集群
注:此处必须开启yarn集群
- 分布式文件系统创建input目录并且input目录上传测试文件friend.txt
- 执行程序