Hadoop 生产配置文件V2

Hadoop 生产配置文件V2

生产环境的配置文件调优 !!! Apache Hadoop 2.7.3 && NN HA && RM HA且仅针对于HDFS && Yarn 本身配置文件,不包括Gc 等其他单独角色调优 ,可供与参考或者直接使用。当然并不一定是最优化。

Core-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://flashHadoopDev</value>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>file:///app/hadoop/tmp/</value>
  </property>
  <property>
    <name>io.file.buffer.size</name>
    <value>131072</value>
  </property>
  <property>
    <name>ha.zookeeper.quorum</name>
  <value>VECS02907:2181,VECS02908:2181,VECS02909:2181</value>
  </property>
  <property>
    <name>io.compression.codecs</name>
    <value>org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.Lz4Codec</value>
  </property>
  <property>
    <name>fs.trash.interval</name>
    <value>2880</value>
  </property>
 <!-- <property>
    <name>net.topology.script.file.name</name>
    <value>/apps/hadoop-conf/rack.sh</value>
  </property>
-->
    <!-- HealthMonitor check namenode 的超时设置,默认50000ms,改为5mins -->
   <property>
       <name>ha.health-monitor.rpc-timeout.ms</name>
       <value>300000</value>
   </property>
   <!-- zk failover的session 超时设置,默认5000ms,改为3mins -->
   <property>
       <name>ha.zookeeper.session-timeout.ms</name>
       <value>180000</value>
   </property>


<property>
    <name>hadoop.proxyuser.deploy.hosts</name>
    <value>*</value>
</property>
<property>
    <name>hadoop.proxyuser.deploy.groups</name>
    <value>*</value>
</property>
</configuration>

hdfs-site.xml

<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>dfs.nameservices</name>
    <value>flashHadoopDev</value>
  </property>

  <!-- flashHadoopDev -->
  <property>
    <name>dfs.ha.namenodes.flashHadoopDev</name>
    <value>nn1,nn2</value>
  </property>
  <property>
    <name>dfs.namenode.rpc-address.flashHadoopDev.nn1</name>
    <value>VECS02907:8020</value>
  </property>
  <property>
    <name>dfs.namenode.rpc-address.flashHadoopDev.nn2</name>
    <value>VECS02908:8020</value>
  </property>
  <property>
    <name>dfs.namenode.http-address.flashHadoopDev.nn1</name>
    <value>VECS02907:50070</value>
  </property>
  <property>
    <name>dfs.namenode.http-address.flashHadoopDev.nn2</name>
    <value>VECS02908:50070</value>
  </property>
  <property>
    <name>dfs.client.failover.proxy.provider.flashHadoopDev</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>
  <property>
    <name>dfs.namenode.shared.edits.dir.flashHadoopDev</name>
    <value>qjournal://VECS02907:8485;VECS02908:8485;VECS02909:8485/flashHadoopDev</value>
  </property>

  <property>
    <name>dfs.namenode.name.dir</name>
    <value>file:///data1/data/flashHadoopDev/namenode/,file:///data2/data/flashHadoopDev/namenode/</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>file:///data1/HDATA/dfs/local,
           file:///data2/HDATA/dfs/local,
           file:///data3/HDATA/dfs/local,
           file:///data4/HDATA/dfs/local,
           file:///data5/HDATA/dfs/local,
           file:///data6/HDATA/dfs/local,
           file:///data7/HDATA/dfs/local,
           file:///data8/HDATA/dfs/local</value>
  </property>
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/data1/data/flashHadoopDev/journal</value>
  </property>
  <property>
    <name>dfs.qjournal.start-segment.timeout.ms</name>
    <value>60000</value>
  </property>
  <property>
    <name>dfs.qjournal.prepare-recovery.timeout.ms</name>
    <value>240000</value>
  </property>
  <property>
    <name>dfs.qjournal.accept-recovery.timeout.ms</name>
    <value>240000</value>
  </property>
  <property>
    <name>dfs.qjournal.finalize-segment.timeout.ms</name>
    <value>240000</value>
    </property>
  <property>
    <name>dfs.qjournal.select-input-streams.timeout.ms</name>
    <value>60000</value>
    </property>
  <property>
    <name>dfs.qjournal.get-journal-state.timeout.ms</name>
    <value>240000</value>
  </property>
  <property>
    <name>dfs.qjournal.new-epoch.timeout.ms</name>
    <value>240000</value>
  </property>
  <property>
    <name>dfs.qjournal.write-txns.timeout.ms</name>
    <value>60000</value>
  </property>
  <property>
    <name>dfs.namenode.acls.enabled</name>
    <value>true</value>
    <description>Number of replication for each chunk.</description>
  </property>
  <!--需要根据实际配置进行修改-->
  <property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence</value>
  </property>
  <property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/home/hdfs/.ssh/id_rsa</value>
  </property>
  <property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.permissions.superusergroup</name>
    <value>hadoop</value>
  </property>
  <property>
    <name>dfs.datanode.max.transfer.threads</name>
    <value>16384</value>
  </property>
  <property>
    <name>dfs.hosts.exclude</name>
    <value>/app/hadoop/etc/hadoop/exclude.list</value>
    <description> List of nodes to decommission </description>
  </property>

  <property>
    <name>dfs.datanode.fsdataset.volume.choosing.policy</name>
    <value>org.apache.hadoop.hdfs.server.datanode.fsdataset.AvailableSpaceVolumeChoosingPolicy</value>
  </property>
  <property>
    <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
    <value>10737418240</value>
  </property>
  <property>
    <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
    <value>0.75</value>
</property>
<!-- 2018.06.19 Disk parameter change 每个盘预留1.4T空间-->
<property>
    <name>dfs.datanode.du.reserved</name>
    <value>1503238553600</value> 
    <description>Reserved space in bytes per volume. Always leave this much space free for non dfs use. </description>
</property>
<property>
    <name>dfs.datanode.failed.volumes.tolerated</name>
    <value>1</value>
    <description>The number of volumes that are allowed to fail before a datanode stops offering service. By default any volume failure will cause a datanode to shutdown. </description>
</property>
  <property>
    <name>dfs.client.read.shortcircuit.streams.cache.size</name>
    <value>1000</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit.streams.cache.expiry.ms</name>
    <value>10000</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.domain.socket.path</name>
    <value>/var/run/hadoop-hdfs/dn_socket</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit.skip.checksum</name>
    <value>false</value>
  </property>
  <property>
    <name>dfs.block.size</name>
    <value>134217728</value>
  </property>
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.namenode.handler.count</name>
    <value>200</value>
  </property>
  <property>
    <name>dfs.datanode.handler.count</name>
    <value>40</value>
  </property>
  <property>
     <name>dfs.webhdfs.enabled</name>
     <value>true</value>
  </property>
  <property>
     <name>dfs.namenode.datanode.registration.ip-hostname-check</name>
     <value>false</value>
  </property>
</configuration>

yarn-site.xml

<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
  <!-- Site specific YARN configuration properties -->
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
    <value>org.apache.hadoop.mapred.ShuffleHandler</value>
  </property>
  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>
  <property>
    <description>Where to aggregate logs to.</description>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>hdfs://flashHadoopDev/tmp/logs</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
    <value>logs</value>
  </property>

  <property>
    <description>Classpath for typical applications.</description>
    <name>yarn.application.classpath</name>
    <value>
      $HADOOP_CONF_DIR,
      $HADOOP_COMMON_HOME/*,$HADOOP_COMMON_HOME/lib/*,
      $HADOOP_HDFS_HOME/*,$HADOOP_HDFS_HOME/lib/*,
      $HADOOP_MAPRED_HOME/*,$HADOOP_MAPRED_HOME/lib/*,
      $HADOOP_YARN_HOME/*,$HADOOP_YARN_HOME/lib/*,
      $HADOOP_COMMON_HOME/share/hadoop/common/*,
      $HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
      $HADOOP_COMMON_HOME/share/hadoop/hdfs/*,
      $HADOOP_COMMON_HOME/share/hadoop/hdfs/lib/*,
      $HADOOP_COMMON_HOME/share/hadoop/mapreduce/*,
      $HADOOP_COMMON_HOME/share/hadoop/mapreduce/lib/*,
      $HADOOP_COMMON_HOME/share/hadoop/yarn/*,
      $HADOOP_COMMON_HOME/share/hadoop/yarn/lib/*
     </value>
  </property>
  <!-- resourcemanager config -->
  <property>
    <name>yarn.resourcemanager.connect.retry-interval.ms</name>
    <value>2000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>FLASH_YARN_DEV</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>VECS02907</value>
  </property>
  <property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>VECS02908</value>
  </property>


<!-- CapacityScheduler -->
  <property>
      <name>yarn.resourcemanager.scheduler.class</name>
      <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
  </property>

<!-- CapacityScheduler End-->
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
    <value>5000</value>
  </property>
  <!-- 下线yarn nodemanager的列表文件。-->
  <property>
    <name>yarn.resourcemanager.nodes.exclude-path</name>
    <value>/app/hadoop/etc/hadoop/yarn.exclude</value>
    <final>true</final>
  </property>
  <!-- ZKRMStateStore config -->
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>VECS02908:2181,VECS02907:2181,VECS02909:2181</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk.state-store.address</name>
    <value>VECS02908:2181,VECS02907:2181,VECS02909:2181</value>
  </property>
  <!-- applications manager interface -->
   <!--客户端通过该地址向RM提交对应用程序操作-->
  <property>
    <name>yarn.resourcemanager.address.rm1</name>
    <value>VECS02907:23140</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm2</name>
    <value>VECS02908:23140</value>
  </property>
  <!-- scheduler interface -->
  <!--向RM调度资源地址-->  
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1</name>
    <value>VECS02907:23130</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2</name>
    <value>VECS02908:23130</value>
  </property>
  <!-- RM admin interface -->
  <property>
    <name>yarn.resourcemanager.admin.address.rm1</name>
    <value>VECS02907:23141</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm2</name>
    <value>VECS02908:23141</value>
  </property>
  <!-- RM resource-tracker interface nm向rm汇报心跳&& 领取任务-->
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
    <value>VECS02907:23125</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
    <value>VECS02908:23125</value>
  </property>
  <!-- RM web application interface -->
  <property>
    <name>yarn.resourcemanager.webapp.address.rm1</name>
    <value>VECS02907:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.address.rm2</name>
    <value>VECS02908:8088</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm1</name>
    <value>VECS02907:23189</value>
  </property>
  <property>
    <name>yarn.resourcemanager.webapp.https.address.rm2</name>
    <value>VECS02908:23189</value>
  </property>
  <property>
    <name>yarn.log.server.url</name>
    <value>http://VECS02909:19888/jobhistory/logs</value>
  </property>
  <property>
    <name>yarn.web-proxy.address</name>
    <value>VECS02907:54315</value>
  </property>
  <!-- Node Manager Configs -->
  <property>
    <description>Address where the localizer IPC is.</description>
    <name>yarn.nodemanager.localizer.address</name>
    <value>0.0.0.0:23344</value>
  </property>
  <property>
    <description>NM Webapp address.</description>
    <name>yarn.nodemanager.webapp.address</name>
    <value>0.0.0.0:8042</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>file:///data8/HDATA/yarn/local,
           file:///data7/HDATA/yarn/local,
           file:///data6/HDATA/yarn/local,
           file:///data5/HDATA/yarn/local,
           file:///data4/HDATA/yarn/local,
           file:///data3/HDATA/yarn/local,
           file:///data2/HDATA/yarn/local,
           file:///data1/HDATA/yarn/local</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>file:///data8/HDATA/yarn/logs,
           file:///data7/HDATA/yarn/logs,
           file:///data6/HDATA/yarn/logs,
           file:///data5/HDATA/yarn/logs,
           file:///data4/HDATA/yarn/logs,
           file:///data3/HDATA/yarn/logs,
           file:///data2/HDATA/yarn/logs,
           file:///data1/HDATA/yarn/logs</value>
  </property>
  <property>
    <name>yarn.nodemanager.delete.debug-delay-sec</name>
    <value>1200</value>
  </property>
  <property>
    <name>mapreduce.shuffle.port</name>
    <value>23080</value>
  </property>
  <property>
    <name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
    <value>true</value>
  </property>
  <!-- tuning -->
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>20480</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>8</value>
  </property>
  <!-- tuning yarn container -->
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>2048</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>8192</value>
  </property>
  <property>
    <name>yarn.scheduler.increment-allocation-mb</name>
    <value>512</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.allow-undeclared-pools</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.allow-undeclared-pools</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  <property>                                          
    <name>yarn.nodemanager.pmem-check-enabled</name> 
    <value>false</value>
</property>
<property>
       <name>yarn.nodemanager.vmem-pmem-ratio</name>
          <value>2.1</value>
             <description>Ratio between virtual memory to physical memory when setting memory limits for containers</description>
         </property>
  <property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>1209600</value>
</property>
<!-- 新增新特性 -->
  <property>
    <name>yarn.node-labels.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.node-labels.fs-store.root-dir</name>
    <value>hdfs://flashHadoopDev/yarn/yarn-node-labels/</value>
  </property>
<!-- timeline server -->
 <property>
   <name>yarn.timeline-service.enabled</name>
   <value>true</value>
 </property>
 <property>
   <name>yarn.resourcemanager.system-metrics-publisher.enabled</name>
   <value>true</value>
 </property>
 <property>
   <name>yarn.timeline-service.generic-application-history.enabled</name>
   <value>true</value>
 </property>
</configuration>

mapred-site.xml

<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at

    http://www.apache.org/licenses/LICENSE-2.0

  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>VECS02909:10020</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>VECS02909:19888</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.staging-dir</name>
    <value>/user</value>
  </property>

  <!-- tuning  mapreduce -->
  <property>
    <name>mapreduce.map.memory.mb</name>
    <value>2048</value>
  </property>
  <property>
    <name>mapreduce.map.java.opts</name>
    <value>-Xmx1536m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70 -Dfile.encoding=UTF-8</value>
  </property>
  <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>6144</value>
  </property>
  <property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-Xmx4608m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70 -Dfile.encoding=UTF-8</value>
  </property>
  <property>
    <name>mapreduce.map.cpu.vcores</name>
    <value>1</value>
  </property>
  <property>
    <name>mapreduce.reduce.cpu.vcores</name>
    <value>2</value>
  </property>
  <property> 
      <name>mapreduce.cluster.local.dir</name>  
      <value>file:///data8/HDATA/mapred/local,
             file:///data7/HDATA/mapred/local,
             file:///data6/HDATA/mapred/local, 
             file:///data5/HDATA/mapred/local,
             file:///data4/HDATA/mapred/local,
             file:///data3/HDATA/mapred/local,
             file:///data2/HDATA/mapred/local,
             file:///data1/HDATA/mapred/local</value> 
     </property>
<!--map and shuffle and reduce turning -->
  <property>
      <name>mapreduce.task.io.sort.mb</name>
      <value>300</value>
  </property>
  <!--     30*10=io.sort.mb -->
  <property>
    <name>mapreduce.jobhistory.max-age-ms</name>
    <value>1296000000</value>
    <source>mapred-default.xml</source>
  </property> 
  <property>
    <name>mapreduce.jobhistory.joblist.cache.size</name>
    <value>200000</value>
    <source>mapred-default.xml</source>
</property>
  <property>
    <name>mapreduce.input.fileinputformat.input.dir.recursive</name>
    <value>true</value>
</property>

</configuration>

转载于:https://www.cnblogs.com/hit-zb/p/10650141.html

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值