yarn配置标签调度
实现的目标(测试使用的是只有2个NodeManager节点):新建两个标签normal、highmem,配置两个队列dev、prd,其中dev最多使用集群50%的资源,prd可使用集群50%的资源
按照以下修改完配置,重启yarn,再新建标签、给机器添加标签
新建标签
yarn rmadmin -addToClusterNodeLabels “normal,highmem”
给机器添加标签
yarn rmadmin -replaceLabelsOnNode “主机名01:45454=normal 主机名02:45454=highmem”
配置可参考如下
一、在yarn-site.xml中新增以下配置
<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:45454</value>
</property>
<property>
<name>yarn.node-labels.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.node-labels.fs-store.root-dir</name>
<value>/user/node-label</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name>yarn.node-labels.manager-class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.nodelabels.RMNodeLabelsManager</value>
</property>
二、修改capacity-scheduler.xml
<configuration>
<property>
<name>yarn.scheduler.capacity.maximum-applications</name>
<value>10000</value>
<description>
Maximum number of applications that can be pending and running.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.maximum-am-resource-percent</name>
<value>0.8</value>
<description>
Maximum percent of resources in the cluster which can be used to run
application masters i.e. controls number of concurrent running
applications.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.resource-calculator</name>
<value>org.apache.hadoop.yarn.util.resource.DominantResourceCalculator</value>
<description>
The ResourceCalculator implementation to be used to compare
Resources in the scheduler.
The default i.e. DefaultResourceCalculator only uses Memory while
DominantResourceCalculator uses dominant-resource to compare
multi-dimensional resources such as Memory, CPU etc.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.root.queues</name>
<value>dev,prd</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.maximum-capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.maximum-capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.accessible-node-labels</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.accessible-node-labels</name>
<value>normal</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.accessible-node-labels</name>
<value>highmem</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.accessible-node-labels.normal.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.accessible-node-labels.highmem.capacity</name>
<value>50</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.accessible-node-labels.normal.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.accessible-node-labels.highmem.capacity</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.default-node-label-expression</name>
<value>normal</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.default-node-label-expression</name>
<value>highmem</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.state</name>
<value>RUNNING</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.acl_submit_applications</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.dev.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.root.prd.acl_administer_queue</name>
<value>*</value>
</property>
<property>
<name>yarn.scheduler.capacity.node-locality-delay</name>
<value>2</value>
<description>
Number of missed scheduling opportunities after which the CapacityScheduler
attempts to schedule rack-local containers.
Typically this should be set to number of nodes in the cluster, By default is setting
approximately number of nodes in one rack which is 40.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.queue-mappings</name>
<value></value>
<description>
A list of mappings that will be used to assign jobs to queues
The syntax for this list is [u|g]:[name]:[queue_name][,next mapping]*
Typically this list will be used to map users to queues,
for example, u:%user:%user maps all users to queues with the same name
as the user.
</description>
</property>
<property>
<name>yarn.scheduler.capacity.queue-mappings-override.enable</name>
<value>false</value>
<description>
If a queue mapping is present, will it override the value specified
by the user? This can be used by administrators to place jobs in queues
that are different than the one specified by the user.
The default is false.
</description>
</property>
</configuration>