注:以两表join为例
前提:
1、在elipse中导入相应jar包。
hadoop-x-x-x/share/hadoop/mapreduce下的所有jar包
hadoop-x-x-x/share/hadoop/hdfs/下的hadoop-hdfs-2.7.1.jar
hadoop-x-x-x/share/hadoop/yarn下的所有jar包
hadoop-x-x-x/share/hadoop/common下的hadoop-common-2.7.1.jar
hadoop-x-x-x/share/hadoop/common/lib下的所有包
hadoop-x-x-x/share/hadoop/tools下的所有jar包
2、启动hdfs。
Order表:
uid | orderid |
---|---|
1 | 101 |
1 | 102 |
2 | 103 |
User表:
uid | name |
---|---|
1 | Lily |
2 | Tom |
操作详细步骤:
1、 首先在本地创建两个csv格式文本,再上传到hdfs:
2、 引入环境变量:
3、 打包代码后放到/usr/local/hadoop目录下并进行编译:
4、 查看编译产生的3个 .class文件,并打包:
5、 启动MapReduce作业:
作业完成:
6、查看作业结果:
完成!
完整代码:
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
public class Reducejoin{
public static class MapClass extends
Mapper<LongWritable, Text, Text, Text>
{
//减少map计算时创建对象的个数
private Text key = new Text();
private Text value = new Text();
private String[] keyValue = null;
@Override
protected void map(LongWritable key, Text value, Context context)
throws IOException, InterruptedException
{
//value是每一行的内容,Text类型,所以我们要把key从value中解析出来
keyValue = value.toString().split(",", 2);
this.key.set(keyValue[0]);
this.value.set(keyValue[1]);
context.write(this.key, this.value);
}
}
public static class Reduce extends Reducer<Text, Text, Text, Text>
{
private Text value = new Text();
@Override
protected void reduce(Text key, Iterable<Text> values, Context context)
throws IOException, InterruptedException
{
StringBuilder valueStr = new StringBuilder();
//values中的每一个值是不同数据文件中的具有相同key的值即是map中输出的多个文件相同key的value值集合
for(Text val : values)
{
valueStr.append(val);
valueStr.append(",");
}
this.value.set(valueStr.deleteCharAt(valueStr.length()-1).toString());
context.write(key, this.value);
}
}
public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
Configuration conf = new Configuration();
Job job = new Job(conf, "Join");
job.setJarByClass(Reducer.class);
job.setMapperClass(MapClass.class);
job.setReducerClass(Reduce.class);
//job.setCombinerClass(Reduce.class); //可选操作
job.setOutputKeyClass(Text.class);
job.setOutputValueClass(Text.class);
job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(TextOutputFormat.class);
FileInputFormat.addInputPath(job, new Path(args[0]));
FileOutputFormat.setOutputPath(job, new Path(args[1]));
System.exit(job.waitForCompletion(true) ? 0 : 1);
}
}
如涉及版权问题,请及时联系,定在第一时间删除。