MapReduce作业（Shell命令）详细步骤图解

最新推荐文章于 2022-02-23 19:54:12 发布

SupAor

最新推荐文章于 2022-02-23 19:54:12 发布

阅读量1.5k

点赞数 2

文章标签： hadoop mapreduce 大数据

本文链接：https://blog.csdn.net/qq_43374605/article/details/106108588

版权

注：以两表join为例

前提：

1、在elipse中导入相应jar包。
hadoop-x-x-x/share/hadoop/mapreduce下的所有jar包
hadoop-x-x-x/share/hadoop/hdfs/下的hadoop-hdfs-2.7.1.jar
hadoop-x-x-x/share/hadoop/yarn下的所有jar包
hadoop-x-x-x/share/hadoop/common下的hadoop-common-2.7.1.jar
hadoop-x-x-x/share/hadoop/common/lib下的所有包
hadoop-x-x-x/share/hadoop/tools下的所有jar包
2、启动hdfs。

Order表：

uid	orderid
1	101
1	102
2	103

User表：

uid	name
1	Lily
2	Tom

操作详细步骤：

1、首先在本地创建两个csv格式文本，再上传到hdfs：
在这里插入图片描述

2、引入环境变量：

3、打包代码后放到/usr/local/hadoop目录下并进行编译：

4、查看编译产生的3个 .class文件，并打包：
![在这里插入图片描述](https://img-blog.csdnimg.cn/20200513222049908.png

在这里插入图片描述
5、启动MapReduce作业：

作业完成：

6、查看作业结果：

完成！

完整代码：

import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.conf.Configuration;
import java.io.IOException;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;


public class Reducejoin{
public static class MapClass extends   
Mapper<LongWritable, Text, Text, Text>
{
//减少map计算时创建对象的个数
private Text key = new Text();
private Text value = new Text();
private String[] keyValue = null;

@Override  
protected void map(LongWritable key, Text value, Context context)  
throws IOException, InterruptedException
{
//value是每一行的内容,Text类型，所以我们要把key从value中解析出来
        keyValue = value.toString().split(",", 2);
this.key.set(keyValue[0]);  
this.value.set(keyValue[1]);
        context.write(this.key, this.value);
    }
}
public static class Reduce extends Reducer<Text, Text, Text, Text>
{
private Text value = new Text();
@Override  
protected void reduce(Text key, Iterable<Text> values, Context context)  
throws IOException, InterruptedException
{
        StringBuilder valueStr = new StringBuilder();
//values中的每一个值是不同数据文件中的具有相同key的值即是map中输出的多个文件相同key的value值集合
for(Text val : values)  
        {
            valueStr.append(val);
            valueStr.append(",");
        }
this.value.set(valueStr.deleteCharAt(valueStr.length()-1).toString());
        context.write(key, this.value);
    }

}
public static void main(String[] args) throws IllegalArgumentException, IOException, ClassNotFoundException, InterruptedException {
        Configuration conf = new Configuration();
        Job job = new Job(conf, "Join");
        job.setJarByClass(Reducer.class);
        job.setMapperClass(MapClass.class);
        job.setReducerClass(Reduce.class);
      //job.setCombinerClass(Reduce.class);    //可选操作
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(TextInputFormat.class);
        job.setOutputFormatClass(TextOutputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }

}