order.txt
order011 u001
order012 u001
order033 u005
order034 u002
order055 u003
order066 u004
order077 u010user.txt
u001,hangge,18,male,angelababy
u002,huihui,58,female,ruhua
u003,guanyu,16,male,chunge
u004,laoduan,38,male,angelababy
u005,nana,24,femal,huangbo
u006,xingge,18,male,laoduan
最终结果
u001,hangge,18,male,angelababy,order012
u001,hangge,18,male,angelababy,order011
u002,huihui,58,female,ruhua,order034
u003,guanyu,16,male,chunge,order055
u004,laoduan,38,male,angelababy,order066
u005,nana,24,femal,huangbo,order033
null,order077
需求分析
代码实现
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.NullWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.FileSplit;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
public class JoinDemo {
private static class JoinMapper extends Mapper<LongWritable, Text,Text,Text>{
private String fileName;
@Override
protected void setup(Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
//获取文件名
FileSplit f = (FileSplit)context.getInputSplit();
Path path = f.getPath();
fileName = path.getName();
}
Text t = new Text();
@Override
protected void map(LongWritable key, Text value, Mapper<LongWritable, Text, Text, Text>.Context context) throws IOException, InterruptedException {
//如果是user文件
if (fileName.startsWith("user")){
String uid = value.toString().split(",")[0];
t.set(uid);
}else{//如果是order文件
String uid = value.toString().split("\\s+")[1];
t.set(uid);
}
context.write(t,value);
}
}
private static class JoinReducer extends Reducer<Text,Text,Text, NullWritable>{
/*
key: 001
values:{order011 u001,order012 u001,u001,hangge,18,male,angelababy}
*/
Text k3 = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Reducer<Text, Text, Text, NullWritable>.Context context) throws IOException, InterruptedException {
String user = null;
List<String> orderList = new ArrayList<>();
for (Text value : values) {
if(value.toString().contains(",")){
user = value.toString();
}else{
orderList.add(value.toString().split("\\s+")[0]);
}
}
//遍历集合 拼接字符串
for (String s : orderList) {
k3.set(user+","+s);
context.write(k3,NullWritable.get());
}
}
}
public static void main(String[] args) throws IOException, InterruptedException, ClassNotFoundException {
Configuration conf = new Configuration();
//创建任务
Job job = Job.getInstance(conf, "movie2");
//设置Mapper类
job.setMapperClass(JoinMapper.class);
//设置Reduce类
job.setReducerClass(JoinReducer.class);
//设置map的输出类型
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//设置reduce的输出类型
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(NullWritable.class);
//设置输入文件位置
FileInputFormat.setInputPaths(job,new Path("d:\\work\\abc\\input"));
//设置输出文件位置
FileOutputFormat.setOutputPath(job,new Path("d:\\work\\abc\\out_res"));
//将任务提交 并等待完成
job.waitForCompletion(true);
}
}