hadoop Capacity Scheduler 完整配置

配置hadoop的调度器和队列(目前没用过,先放着,以备后患)

mapred-site.xml


<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>

<!-- Put site-specific property overrides in this file. -->

<configuration>
<property>
<name>mapred.job.tracker</name>
<value>0.0.0.0:9003</value>
</property>

<property>
<name>mapred.jobtracker.taskScheduler</name>
<value>org.apache.hadoop.mapred.CapacityTaskScheduler</value>
</property>

<property>
  <name>mapred.queue.names</name>
  <value>queueA,queueB,queueC,queueD</value>
</property>
</configuration>


--------------------------------------------
capacity-scheduler.xml

<?xml version="1.0"?>

<!-- This is the configuration file for the resource manager in Hadoop. -->
<!-- You can configure various scheduling parameters related to queues. -->
<!-- The properties for a queue follow a naming convention,such as, -->
<!-- mapred.capacity-scheduler.queue.<queue-name>.property-name. -->

<configuration>
<!--
  <property>
    <name>mapred.capacity-scheduler.queue.default.capacity</name>
    <value>10</value>
    <description>Percentage of the number of slots in the cluster that are
      to be available for jobs in this queue.
    </description>    
  </property>
  
  <property>
    <name>mapred.capacity-scheduler.queue.default.maximum-capacity</name>
    <value>-1</value>
    <description>
maximum-capacity defines a limit beyond which a queue cannot use the capacity of the cluster.
This provides a means to limit how much excess capacity a queue can use. By default, there is no limit.
The maximum-capacity of a queue can only be greater than or equal to its minimum capacity.
        Default value of -1 implies a queue can use complete capacity of the cluster.

        This property could be to curtail certain jobs which are long running in nature from occupying more than a 
        certain percentage of the cluster, which in the absence of pre-emption, could lead to capacity guarantees of 
        other queues being affected.
        
        One important thing to note is that maximum-capacity is a percentage , so based on the cluster's capacity
        the max capacity would change. So if large no of nodes or racks get added to the cluster , max Capacity in 
        absolute terms would increase accordingly.
    </description>    
  </property>
  
  <property>
    <name>mapred.capacity-scheduler.queue.default.supports-priority</name>
    <value>false</value>
    <description>If true, priorities of jobs will be taken into 
      account in scheduling decisions.
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.queue.default.minimum-user-limit-percent</name>
    <value>100</value>
    <description> Each queue enforces a limit on the percentage of resources 
    allocated to a user at any given time, if there is competition for them. 
    This user limit can vary between a minimum and maximum value. The former
    depends on the number of users who have submitted jobs, and the latter is
    set to this property value. For example, suppose the value of this 
    property is 25. If two users have submitted jobs to a queue, no single 
    user can use more than 50% of the queue resources. If a third user submits
    a job, no single user can use more than 33% of the queue resources. With 4 
    or more users, no user can use more than 25% of the queue's resources. A 
    value of 100 implies no user limits are imposed. 
    </description>
  </property>
  <property>
    <name>mapred.capacity-scheduler.queue.default.maximum-initialized-jobs-per-user</name>
    <value>2</value>
    <description>The maximum number of jobs to be pre-initialized for a user
    of the job queue.
    </description>
  </property>
-->
  <!-- The default configuration settings for the capacity task scheduler -->
  <!-- The default values would be applied to all the queues which don't have -->
  <!-- the appropriate property for the particular queue -->
<!--
  <property>
    <name>mapred.capacity-scheduler.default-supports-priority</name>
    <value>false</value>
    <description>If true, priorities of jobs will be taken into 
      account in scheduling decisions by default in a job queue.
    </description>
  </property>
  
  <property>
    <name>mapred.capacity-scheduler.default-minimum-user-limit-percent</name>
    <value>100</value>
    <description>The percentage of the resources limited to a particular user
      for the job queue at any given point of time by default.
    </description>
  </property>

  <property>
    <name>mapred.capacity-scheduler.default-maximum-initialized-jobs-per-user</name>
    <value>2</value>
    <description>The maximum number of jobs to be pre-initialized for a user
    of the job queue.
    </description>
  </property>
-->

  <!-- Capacity scheduler Job Initialization configuration parameters -->
   <property>
    <name>mapred.capacity-scheduler.init-poll-interval</name>
    <value>5000</value>
    <description>The amount of time in miliseconds which is used to poll 
    the job queues for jobs to initialize.
    </description>
  </property>
  <property>
    <name>mapred.capacity-scheduler.init-worker-threads</name>
    <value>5</value>
    <description>Number of worker threads which would be used by
    Initialization poller to initialize jobs in a set of queue.
    If number mentioned in property is equal to number of job queues
    then a single thread would initialize jobs in a queue. If lesser
    then a thread would get a set of queues assigned. If the number
    is greater then number of threads would be equal to number of 
    job queues.
    </description>
  </property>

<property> 
  <name>mapred.capacity-scheduler.maximum-system-jobs</name> 
  <value>30</value> 
<description>Maximum number of jobs in the system which can be initialized, 
concurrently, by the Capacity Scheduler. 
</description> 
</property> 

<!-- queue: queueA --> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.capacity</name> 
  <value>25</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.supports-priority</name> 
  <value>false</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.minimum-user-limit-percent</name> 
  <value>20</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.user-limit-factor</name> 
  <value>10</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.maximum-initialized-active-tasks</name> 
  <value>200000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.maximum-initialized-active-tasks-per-user</name> 
  <value>100000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueA.init-accept-jobs-factor</name> 
  <value>100</value> 
</property> 

<!-- queue: queueB --> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.capacity</name> 
  <value>25</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.supports-priority</name> 
  <value>false</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.minimum-user-limit-percent</name> 
  <value>20</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.user-limit-factor</name> 
  <value>1</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.maximum-initialized-active-tasks</name> 
  <value>200000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.maximum-initialized-active-tasks-per-user</name> 
  <value>100000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueB.init-accept-jobs-factor</name> 
  <value>10</value> 
</property> 

<!-- queue: queueC --> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.capacity</name> 
  <value>25</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.supports-priority</name> 
  <value>false</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.minimum-user-limit-percent</name> 
  <value>20</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.user-limit-factor</name> 
  <value>1</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.maximum-initialized-active-tasks</name> 
  <value>200000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.maximum-initialized-active-tasks-per-user</name> 
  <value>100000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueC.init-accept-jobs-factor</name> 
  <value>10</value> 
</property> 

<!-- queue: queueD --> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.capacity</name> 
  <value>25</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.supports-priority</name> 
  <value>false</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.minimum-user-limit-percent</name> 
  <value>20</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.user-limit-factor</name> 
  <value>1</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.maximum-initialized-active-tasks</name> 
  <value>200000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.maximum-initialized-active-tasks-per-user</name> 
  <value>100000</value> 
</property> 
<property> 
  <name>mapred.capacity-scheduler.queue.queueD.init-accept-jobs-factor</name> 
  <value>10</value> 
</property> 
</configuration>

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值