mapreduce的使用

千槿°

已于 2022-05-12 15:43:01 修改

阅读量371

点赞数

文章标签： hadoop mapreduce hdfs

于 2021-10-23 10:24:11 首次发布

本文链接：https://blog.csdn.net/qq_52401562/article/details/120860647

版权

本文介绍了如何在Hadoop环境下配置MapReduce，包括修改mapred-site.xml配置文件，并通过一个简单的WordCount例子讲解MapReduce代码的编写过程，强调了避免导入错误包的重要性，以及运行结果需要注意的事项。

摘要由CSDN通过智能技术生成

配置文件

这一步的配置比较简单，直接找到/home/had/hadoop/etc/hadoop目录下的mapred-site.xml.template文件增加如下内容即可。
在这里插入图片描述

注意：如果你的hadoop安装的不是这个目录，需要修改相应的值。
配置完之后不需要重启。

编写MapReduce代码

在编写MapReduce代码前需要先启动集群

这里我用hdfs命令上传了testfile文件，文件内容如下

Hello world
i love hadoop
hadoop hello

直接到eclipse新建一个MapReduce项目，创建包，创建类（如果不知道怎么创建，之前博客里有写）

下面以Wordcount为例 （即统计文件中各单词的数量）
在这里插入图片描述

代码思想：创建configuration对象，创建job实例，设置Wordcount类。利用maper处理文件输出，到reducer执行，最后把结果输入到一个新的文件。

package mapreduce;

import java.io.IOException;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;

import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class wordCount {
	public static class MyMapper extends Mapper<Object, Text, Text, IntWritable> {
		protected void map (Object keymap,Text value,Context context) throws IOException, InterruptedException {
			String str = value.toString();//文件内容转为字符串
			String [] strArr =str.split(" ");//对字符串按空格进行分割
			
			for(String str1:strArr){//遍历所有字符串
				context.write(new Text(str1), new IntWritable(1));
			}
		}
	}
	
    public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		protected void reduce (Text redKey,Iterable<IntWritable> redVals,Context context) throws IOException, InterruptedException {
			int sum=0;
			for(IntWritable redVal:redVals){
				sum = sum + redVal.get();//统计各个字符串的数量
			}
			context.write(redKey, new IntWritable(sum));//把结果写入文件
		}
	}
	public static void main(String[] args) throws IllegalArgumentException, IOException {
		// TODO Auto-generated method stub
        Configuration conf = new Configuration();//创建configuration对象
        Job job =Job.getInstance(conf);//创建job实例
        job.setJarByClass(wordCount.class);
        
        job.setMapperClass(MyMapper.class);
        job.setReducerClass(MyReducer.class);
        
        job.setMapOutputKeyClass(Text.class);
        job.setMapOutputValueClass(IntWritable.class);
        
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(IntWritable.class);
        
        FileInputFormat.setInputPaths(job, new Path("hdfs://hadoop1:8020/data/testfile"));//定义输入文件路径，可以自行更改
        FileOutputFormat.setOutputPath(job, new Path("hdfs://hadoop1:8020/data/output1"));//定义输出文件路径，可以自行更改
        
        try {
			boolean res =job.waitForCompletion(true);
			System.exit(res?0:1);
		} catch (ClassNotFoundException e) {
			// TODO: handle exception
			e.printStackTrace();
		} catch (InterruptedException e){
			e.printStackTrace();
		} 
	}
}