package squencefile;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
//单表关联
public class SingleJoin {
public static class MyMapper extends Mapper<LongWritable, Text, Text, Text> {
@Override
public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
//拆分文件
String line =value.toString();
String[] lineArr = line.split(" ");
//过滤文件头
if(!"child".equals(lineArr[0])){
//输出对应<子,父>,1表示向上父母辈,2向下,孩子辈
context.write(new Text(lineArr[0]),new Text(":1"+lineArr[1]));
//输出对应的<父,子>
context.write(new Text(lineArr[1]),new Text(":2"+lineArr[0]));
}
}
}
public static class MyReducer extends Reducer<Text, Text, Text, Text> {
List<String> grandChildList=new ArrayList<>();
List<String> grandParaentList=new ArrayList<>();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
for(Text tempVal:values){
String tempValStr = tempVal.toString();
String[] tempArr=tempValStr.split(":");
if("2".equals(tempArr[0])){
//找出孙子辈列表
grandChildList.add(tempArr[1]);
}else if("1".equals(tempArr[0])){
//找出祖父辈列表
grandParaentList.add(tempArr[1]);
}
}
//将俩个列表进行关联,获取<孙子辈,祖父辈>关系
for(String grandParent:grandParaentList){
for(String grandChild:grandChildList){
//输出<孙子辈,祖父辈>
context.write(new Text(grandChild),new Text(grandParent));
}
}
}
}
public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
//创建一个job,也就是一个运行环境
Configuration conf=new Configuration();
//集群运行
// conf.set("fs.defaultFS","hdfs://hadoop:8088");
//本地运行
Job job=Job.getInstance(conf,"SingleJoin");
//程序入口(打jar包)
job.setJarByClass(SingleJoin.class);
//需要输入俩个文件:输入文件
FileInputFormat.addInputPath(job,new Path("F:\\filnk_package\\hadoop-2.10.1\\data\\test4\\file1.txt"));
//编写mapper处理逻辑
job.setMapperClass(SingleJoin.MyMapper.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//shuffle流程
//编写reduce处理逻辑
job.setReducerClass(SingleJoin.MyReducer.class);
job.setMapOutputKeyClass(Text.class);
job.setMapOutputValueClass(Text.class);
//输出文件
FileOutputFormat.setOutputPath(job,new Path("F:\\filnk_package\\hadoop-2.10.1\\data\\test4\\out"));
//运行job,需要放到Yarn上运行
boolean result =job.waitForCompletion(true);
System.out.print(result?1:0);
}
}
file1数据:
child parent
Tom Lucy
Tom Jack
Jone Lucy
Jone Jack
Lucy Marry
Lucy Ben
Jack Jesse
Jack Alice
Terry Alice
Terry Jesse
phillip Terry
philip Alma
Mark Terry
Mark Alma