1、准备模拟数据
创建文件qqFriend.txt,并上传到hdfs上(/qq/input/qqFriend.txt)。
文件内容如下:
aa bb
cc dd
ee ff
bb mm
ff ww
从上面的好友列表可以看出:
1、aa和mm有共同的好友bb,所以aa和mm可以互相推送好友
2、ee和ww有共同的好友ff,所以ee和ww可以互相推送好友
2、写mapreduce程序
QQMapper.java:
package com.demo;
import java.io.IOException;
import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Mapper;
public class QQMapper extends Mapper<LongWritable, Text, Text, Text>{
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
String line=value.toString();
String[] names=StringUtils.split(line, '\t');
context.write(new Text(names[0]), new Text(names[1]));
context.write(new Text(names[1]), new Text(names[0]));
}
}
QQReducer.java:
package com.demo;
import java.io.IOException;
import java.util.HashSet;
import java.util.Iterator;
import java.util.Set;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Reducer;
public class QQReducer extends Reducer<Text, Text, Text, Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException {
// TODO Auto-generated method stub
Set<String> ss=new HashSet<>();
for(Text value:values)
{
ss.add(value.toString());
}
if(ss.size()>1)
{
for(Iterator<String> i=ss.iterator();i.hasNext();)
{
String first=i.next();
for(Iterator<String> j=ss.iterator();j.hasNext();)
{
String second=j.next();
if(!first.equals(second))
{
context.write(new Text(first), new Text(second));
}
}
}
}
}
}
QQRunner.java:
package com.demo;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class QQRunner {
public static void main(String[] args) throws Exception{
// TODO Auto-generated method stub
Configuration conf=new Configuration();
conf.set("fs.defaultFS", "hdfs://ns1");
conf.set("dfs.nameservices", "ns1");
conf.set("dfs.ha.namenodes.ns1", "nn1,nn2");
conf.set("dfs.namenode.rpc-address.ns1.nn1", "hadoop1:9000");
conf.set("dfs.namenode.rpc-address.ns1.nn2", "hadoop2:9000");
conf.set("dfs.client.failover.proxy.provider.ns1", "org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider");
Job job=Job.getInstance(conf);
job.setJarByClass(QQRunner.class);
job.setMapperClass(QQMapper.class);
job.setReducerClass(QQReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
FileInputFormat.setInputPaths(job, new Path("/qq/input"));
FileOutputFormat.setOutputPath(job, new Path("/qq/output"));
System.exit(job.waitForCompletion(true)?0:1);
}
}
运行结果:
mm aa
aa mm
ww ee
ee ww
OK,运行结果符合我们的预期。