作业:
singlejoin.txt:
child parent
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Marry
Lucy Ben
Jack Alice
Jack Jesse
Terry Alice
Terry Jesse
Philip Terry
Philip Alma
Mark Terry
Mark Alma
static {
System.setProperty("hadoop.home.dir","E:/x3/hadoop-2.9.2");
}
//map
public static class MyMapper extends Mapper<LongWritable,Text,Text,Text>{
@Override
protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
String[] split = value.toString().split("\t");
if(key.get()==0){
return;
}
//0 : child 1 : parent
context.write(new Text(split[0]),new Text("1:"+split[1]));
context.write(new Text(split[1]),new Text("0:"+split[0]));
}
}
//reduce
public static class MyReduce extends Reducer<Text,Text,Text,Text>{
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
ArrayList<String> parentList = new ArrayList<>();
ArrayList<String> chileList = new ArrayList<>();
for(Text value : values){
String[] split = value.toString().split(":");
//如果是父亲 添加进父类集合
if("1".equals(split[0])){
parentList.add(split[1]);
}else{
chileList.add(split[1]);
}
}
//循环遍历
for(String parent : parentList){
for(String child : chileList){
context.write(key,new Text(parent+" : "+child));
}
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//0.初始化一个job
Configuration conf = new Configuration();
Job job = Job.getInstance(conf, "single_join");
job.setJarByClass(SingleJoin.class);
//1.输入文件
FileInputFormat.addInputPaths(job, args[0]);
//2.map并行计算
job.setMapperClass(MyMapper.class);
//3.shuffle流程(内部实现)
//4.reduce计算
job.setReducerClass(MyReduce.class);
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
//5.输出文件
FileOutputFormat.setOutputPath(job, new Path(args[1]));
FileSystem fs = FileSystem.get(conf);
if (fs.exists(new Path(args[1]))) {
fs.delete(new Path(args[1]), true);
}
//6.提交作业(总入口)
boolean result = job.waitForCompletion(true);
System.out.println(result ? 1 : 0);
}
最终结果: