彷徨 | MapReduce客户端各种提交方式参数的配置(windows->local,windows->yarn,Linux->locall,Linux->yarn)

最新推荐文章于 2024-01-10 20:29:55 发布

俊杰梓

最新推荐文章于 2024-01-10 20:29:55 发布

阅读量658

点赞数

分类专栏：大数据文章标签： MapReduce提交 MapReduce Local Yarn

本文链接：https://blog.csdn.net/weixin_35353187/article/details/81987123

版权

大数据专栏收录该内容

66 篇文章 19 订阅

订阅专栏

MR程序的两种运行模式

分布式运行模式：必须在yarn平台上

核心特点：

整个运行流程由MRAppMaster控制；

每一个task（maptask，reducetask）以及MRAppMaster，都是以独立的进程在nodemanager所提供的容器中执行；

本地运行模式：在本地以单进程多线程方式运行；

核心特点：

整个运行流程由LocalJobRunner控制，每一个task（maptask、reducetask）都以线程方式执行

决定mr程序是以分布式还是以本地模型运行的关键点：

jobclient端的参数： mapreduce.framework.name= local ? yarn ?

参数可以在jobclient的代码中设置conf.set()；

也可以在jobclient所运行的机器的hadoop配置文件(mapred-site.xml)中配置；

注意：如果以分布式模式运行mr，则你所访问的文件系统一定要是HDFS!!!（参数： fs.defaultFS = hdfs://....）

如果以本地模式运行mr，则你所访问的文件系统可以是HDFS，也可以是本地文件系统；

由此可见 , 提交方式自然就有四种

1.Windows提交到local

2.Windows提交到yarn

3.Linux提交到local

4.Linux提交到yarn

下面分别为四种提交方式的客户端配置参数

1.Windows提交到local

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * job客户端，跟mr程序本身没有任何逻辑关系
 * 它只是负责把mr程序所在的jar包等信息提交给yarn去运行
 * 它和mr程序之间的关系，就像： 运载火箭  和 卫星 的关系
 * @author hunter.d
 * @create_time 2018年4月11日
 * @copyright www.edu360.cn
 */
public class JobClient_windows_local {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration(); // 加载classpath中的hadoop配置文件
		// job api对象，在提交mrjob去运行时，有两种提交目的地选择：1.本地模拟器  2.yarn
		Job job = Job.getInstance(conf);
		
		// 封装本mr程序相关到信息到job对象中
		//job.setJar("d:/wc.jar");
		job.setJarByClass(JobClient_windows_local.class);
		
		// 指定mapreduce程序用jar包中的哪个类作为Mapper逻辑类
		job.setMapperClass(WordcountMapper.class);
		// 指定mapreduce程序用jar包中的哪个类作为Reducer逻辑类
		job.setReducerClass(WordcountReducer.class);
		// 指定maptask做局部聚合时的逻辑类
		//job.setCombinerClass(WordcountCombiner.class);
		job.setCombinerClass(WordcountReducer.class);
		
		// 告诉mapreduce程序，我们的map逻辑输出的KEY.VALUE的类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的reduce逻辑输出的KEY.VALUE的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的原始文件在哪里
		FileInputFormat.setInputPaths(job, new Path("d:/wordcount/input/"));
		// 告诉mapreduce程序，结果数据往哪里写
		FileOutputFormat.setOutputPath(job, new Path("d:/wordcount/output2/"));
		
		// 设置reduce task的运行实例数
		job.setNumReduceTasks(2); // 默认是1
		
		// 调用job对象的方法来提交任务
		//job.submit();
		boolean res = job.waitForCompletion(true);  // 阻塞方法
		System.exit(res?0:1);
		
	}

}

2.Windows提交到yarn

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * job客户端，跟mr程序本身没有任何逻辑关系
 * 它只是负责把mr程序所在的jar包等信息提交给yarn去运行
 * 它和mr程序之间的关系，就像： 运载火箭  和 卫星 的关系
 * @author hunter.d
 * @create_time 2018年4月11日
 * @copyright www.edu360.cn
 */
public class JobClient_windows_yarn {
	public static void main(String[] args) throws Exception {
		
		System.setProperty("HADOOP_USER_NAME", "root");
		
		Configuration conf = new Configuration(); // 加载classpath中的hadoop配置文件
		// job api对象，在提交mrjob去运行时，有两种提交目的地选择：1.本地模拟器  2.yarn
		conf.set("mapreduce.framework.name", "yarn");  //  mapred-site.xml
		conf.set("fs.defaultFS", "hdfs://cts01:9000");  // core-site.xml
		conf.set("yarn.resourcemanager.hostname", "cts01");
		conf.set("mapreduce.app-submission.cross-platform", "true");  //跨平台提交；从windows上运行客户端提交mr job到linux服务器（yarn）
		Job job = Job.getInstance(conf);
		
		// 封装本mr程序相关到信息到job对象中
		job.setJar("d:/wc.jar");
		//job.setJarByClass(JobClient.class);
		
		// 指定mapreduce程序用jar包中的哪个类作为Mapper逻辑类
		job.setMapperClass(WordcountMapper.class);
		// 指定mapreduce程序用jar包中的哪个类作为Reducer逻辑类
		job.setReducerClass(WordcountReducer.class);
		
		// 告诉mapreduce程序，我们的map逻辑输出的KEY.VALUE的类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的reduce逻辑输出的KEY.VALUE的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的原始文件在哪里
		FileInputFormat.setInputPaths(job, new Path("/wordcount/input/"));
		// 告诉mapreduce程序，结果数据往哪里写
		FileOutputFormat.setOutputPath(job, new Path("/wordcount/output2/"));
		
		// 设置reduce task的运行实例数
		job.setNumReduceTasks(2); // 默认是1
		
		// 调用job对象的方法来提交任务
		//job.submit();
		boolean res = job.waitForCompletion(true);  // 阻塞方法
		System.exit(res?0:1);
		
	}

}

3.Linux提交到local

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * job客户端，跟mr程序本身没有任何逻辑关系
 * 它只是负责把mr程序所在的jar包等信息提交给yarn去运行
 * 它和mr程序之间的关系，就像： 运载火箭  和 卫星 的关系
 * @author hunter.d
 * @create_time 2018年4月11日
 * @copyright www.edu360.cn
 */
public class JobClient_linux_local {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration(); // 加载classpath中的hadoop配置文件
		// job api对象，在提交mrjob去运行时，有两种提交目的地选择：1.本地模拟器  2.yarn
		Job job = Job.getInstance(conf);
		
		// 封装本mr程序相关到信息到job对象中
		//job.setJar("d:/wc.jar");
		job.setJarByClass(JobClient_linux_local.class);
		
		// 指定mapreduce程序用jar包中的哪个类作为Mapper逻辑类
		job.setMapperClass(WordcountMapper.class);
		// 指定mapreduce程序用jar包中的哪个类作为Reducer逻辑类
		job.setReducerClass(WordcountReducer.class);
		
		// 告诉mapreduce程序，我们的map逻辑输出的KEY.VALUE的类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的reduce逻辑输出的KEY.VALUE的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的原始文件在哪里
		FileInputFormat.setInputPaths(job, new Path("/root/wordcount/input/"));
		// 告诉mapreduce程序，结果数据往哪里写
		FileOutputFormat.setOutputPath(job, new Path("/root/wordcount/output2/"));
		
		// 设置reduce task的运行实例数
		job.setNumReduceTasks(2); // 默认是1
		
		// 调用job对象的方法来提交任务
		//job.submit();
		boolean res = job.waitForCompletion(true);  // 阻塞方法
		System.exit(res?0:1);
		
	}

}

4.Linux提交到yarn

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IntWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;

/**
 * job客户端，跟mr程序本身没有任何逻辑关系
 * 它只是负责把mr程序所在的jar包等信息提交给yarn去运行
 * 它和mr程序之间的关系，就像： 运载火箭  和 卫星 的关系
 * @author hunter.d
 * @create_time 2018年4月11日
 * @copyright www.edu360.cn
 */
public class JobClient_linux_yarn {
	public static void main(String[] args) throws Exception {
		Configuration conf = new Configuration(); // 加载classpath中的hadoop配置文件
		// job api对象，在提交mrjob去运行时，有两种提交目的地选择：1.本地模拟器  2.yarn
		conf.set("mapreduce.framework.name", "yarn");  //  mapred-site.xml
		conf.set("fs.defaultFS", "hdfs://cts01:9000");  // core-site.xml
		Job job = Job.getInstance(conf);
		
		// 封装本mr程序相关到信息到job对象中
		//job.setJar("d:/wc.jar");
		job.setJarByClass(JobClient_linux_yarn.class);
		
		// 指定mapreduce程序用jar包中的哪个类作为Mapper逻辑类
		job.setMapperClass(WordcountMapper.class);
		// 指定mapreduce程序用jar包中的哪个类作为Reducer逻辑类
		job.setReducerClass(WordcountReducer.class);
		
		// 告诉mapreduce程序，我们的map逻辑输出的KEY.VALUE的类型
		job.setMapOutputKeyClass(Text.class);
		job.setMapOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的reduce逻辑输出的KEY.VALUE的类型
		job.setOutputKeyClass(Text.class);
		job.setOutputValueClass(IntWritable.class);
		
		// 告诉mapreduce程序，我们的原始文件在哪里
		FileInputFormat.setInputPaths(job, new Path("/wordcount/input/"));
		// 告诉mapreduce程序，结果数据往哪里写
		FileOutputFormat.setOutputPath(job, new Path("/wordcount/output2/"));
		
		// 设置reduce task的运行实例数
		job.setNumReduceTasks(2); // 默认是1
		
		// 调用job对象的方法来提交任务
		//job.submit();
		boolean res = job.waitForCompletion(true);  // 阻塞方法
		System.exit(res?0:1);
		
	}

}

maptask和reducetask所申请的资源配额：

mapreduce.map.memory.mb	1024	一个maptask所需要的分配内存
mapreduce.map.cpu.vcores	1	一个maptask所需要的vcore
mapreduce.reduce.memory.mb	1024	一个reducetask所需要的内存
mapreduce.reduce.cpu.vcores	1	一个reducetask所需要的vcore

MRAppMaster所申请的资源配额：

yarn.app.mapreduce.am.resource.mb	1536	默认1.5G
yarn.app.mapreduce.am.resource.cpu-vcores	1	默认1核

俊杰梓

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
彷徨 | MapReduce客户端各种提交方式参数的配置(windows->local,windows->yarn,Linux->locall,Linux->yarn)

MR程序的两种运行模式分布式运行模式：必须在yarn平台上核心特点：整个运行流程由MRAppMaster控制；每一个task（maptask，reducetask）以及MRAppMaster，都是以独立的进程在nodemanager所提供的容器中执行；本地运行模式：在本地以单进程多线程方式运行；核心特点：整个运行流程由LocalJobRunner控制，每一个task（m...
复制链接

扫一扫