MapReduce运行模式

最新推荐文章于 2022-11-29 11:28:28 发布

jinYwuM

最新推荐文章于 2022-11-29 11:28:28 发布

阅读量427

点赞数 1

分类专栏： hadoop

本文链接：https://blog.csdn.net/jinYwuM/article/details/96389529

版权

hadoop 专栏收录该内容

7 篇文章 0 订阅

订阅专栏

MapReduce运行模式

1、本地模式 OR 集群模式

// 设置为local时，运行模式为本地模式
config.set("mapreduce.framework.name", "local");

// 设置为yarn时，运行模式为集群模式
config.set("mapreduce.framework.name", "yarn");

2、数据文件的输入输出路径

// 设置输入输出为hdfs路径
config.set("fs.defaultFS", "hdfs://node01:9000");

// 设置输入输出为本地路径
config.set("fs.defaultFS", "file:///");

提示：集群模式下，文件路径必须是hdfs路径

3、文件的输出路径为hdfs时，可能出现AccessControlException: Permission denied，报错信息如下

Caused by: org.apache.hadoop.ipc.RemoteException: org.apache.hadoop.security.AccessControlException: Permission denied: user=node01, access=WRITE, inode="":suh:supergroup:rwxr-xr-x

解决方案：

1、在系统环境变量或JVM变量中添加 HADOOP_USER_NAME，值为运行HADOOP上的Linux的用户名。修改后，需重启eclipse（推荐使用的方案）

2、MapReduce的驱动程序中添加如下设置：

// HADOOP_USER_NAME对应的用户对hdfs有读写权限
conf.set("HADOOP_USER_NAME", "root");

WordCount完成代码

package com.theone.pureone.mymapreducer;
import java.io.IOException;
import java.util.Iterator;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.Mapper;
import org.apache.hadoop.mapreduce.Reducer;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

public class WordCountMR {

	/**
	 * LongWritable：文本行数，从第一行开始
	 * Text：每行的文本数据
	 * Text：Mapper端的输出Key的类型
	 * IntWritable：Mapper端的输出Value的类型
	 * @author Pureone
	 *
	 */
	
	public static class MyMapper extends Mapper<LongWritable, Text, Text, IntWritable> {
		@Override
		protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
			String[] split = value.toString().split("\\s+");
			for (String str : split) {
				value.set(str);
				context.write(value, new IntWritable(1));
			}
		}
	}

	/**
	 * Text：从Mapper端接收的Key的类型
	 * IntWritable：从Mapper端接受的Value的类型
	 * Text：Reducer输出的Key的类型
	 * IntWritable：Reducer输出的Value的类型
	 * @author Pureone
	 *
	 */
	public static class MyReducer extends Reducer<Text, IntWritable, Text, IntWritable> {
		@Override
		protected void reduce(Text key, Iterable<IntWritable> value,
				Reducer<Text, IntWritable, Text, IntWritable>.Context context)
				throws IOException, InterruptedException {
			Iterator<IntWritable> iterator = value.iterator();
			int count = 0;
			while (iterator.hasNext()) {
				IntWritable next = (IntWritable) iterator.next();
				count += next.get();
			}
			IntWritable sum = new IntWritable(count);
			context.write(key, sum);
		}
	}

	/**
	 * 驱动程序
	 * 
	 * @throws IOException
	 * @throws InterruptedException
	 * @throws ClassNotFoundException
	 */

	public static void main(String[] args) throws IOException, ClassNotFoundException, InterruptedException {
		Configuration config = new Configuration();
		// 设置输入输出为hdfs路径
		config.set("fs.defaultFS", "hdfs://node01:9000");
		System.setProperty("HADOOP_USER_NAME", "root");
		// 设置运行模式为本地，若设置为yarn,则为集群模式
		config.set("mapreduce.framework.name", "local");
		// config.set("yarn.resourcemanager.hostname", "node01");
		Job job = Job.getInstance();
		// 设置驱动类
		job.setJarByClass(WordCountMR.class);
		// 设置Mapper端输出的Key-Value的类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		// 设置Reducer端输出的Key-Value的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		// 设置Mapper类
		job.setMapperClass(MyMapper.class);
		// 设置Reducer类
		job.setReducerClass(MyReducer.class);
		// 指定该mapreduce程序数据的输入和输出路径
		Path inputPath = new Path("F:\\input");
		Path outputPath = new Path("hdfs://node01:9000/first_path/output");
		FileSystem fileSystem = FileSystem.get(config);
		// 判断输出路径是否存在
		if (fileSystem.exists(outputPath)) {
			fileSystem.delete(outputPath, true);
		}
		//
		FileInputFormat.setInputPaths(job, inputPath);
		FileOutputFormat.setOutputPath(job, outputPath);
		// 提交作业
		boolean completion = job.waitForCompletion(true);
		System.exit(completion ? 0 : 1);
	}
}