hadoop配置相关

core-site.xml

<configuration>
  <property>
    <name>fs.defaultFS</name>
    <value>hdfs://earth</value>
    <final>true</final>
  </property>
  <property>
    <name>hadoop.tmp.dir</name>
    <value>/data1/tmp-security</value>
    <final>true</final>
  </property>
  <property>
    <name>ha.zookeeper.quorum</name>
    <value>hadoop-btzk0001.eniot.io:2181,hadoop-btzk0002.eniot.io:2181,hadoop-btzk0003.eniot.io:2181</value>
  </property>
  <property>
    <name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
    <value>120</value>
  </property>
  
  
  
  <property>
    <!-- 磁盘du间隔,du对磁盘IO影响比较大 -->
    <name>fs.du.interval</name>
    <value>1200000</value>
  </property>
  <property>
    <name>fs.df.interval</name>
    <value>60000</value>
  </property>
  
  
  <property>
  <!-- 这个是开启hdfs文件删除自动转移到垃圾箱的选项,值为垃圾箱文件清除时间,单位是分钟,默认:360-->
    <name>fs.trash.interval</name>
    <value>1440</value>
    <final>true</final>
  </property>
  
  <property>
  <!-- SequenceFiles在读写中可以使用的缓存大小,默认设置:131072-->
    <name>io.file.buffer.size</name>
    <value>65536</value>
  </property>
  <property>
    <name>io.compression.codecs</name>
    <value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
  </property>
  <property>
    <name>io.compression.codec.lzo.class</name>
    <value>com.hadoop.compression.lzo.LzoCodec</value>
  </property>
  <property>
    <name>lzo.text.input.format.ignore.nonlzo</name>
    <value>false</value>
  </property>
  <property>
    <name>hadoop.ssl.hostname.verifier</name>
    <value>ALLOW_ALL</value>
  </property>

hdfs-site.xml

<configuration>
  <!-- HDFS Common -->
  <property>
   <!-- 启用了keberos,页面上查看hdfs目录需要配置kerberos客户端 -->
    <name>dfs.webhdfs.enabled</name>
    <value>false</value>
  </property>
  
  
  
  <property>
    <name>dfs.replication</name>
    <value>3</value>
  </property>
  <property>
    <name>dfs.nameservices</name>
    <value>earth</value>
  </property>
  <property>
    <name>dfs.ha.namenodes.earth</name>
    <value>nn1,nn2</value>
  </property>
  <property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence
      shell(/bin/true)</value>
  </property>
  <property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/home/hdfs/.ssh/id_rsa</value>
  </property>
  <property>
    <name>dfs.ha.fencing.ssh.connect-timeout</name>
    <value>30000</value>
  </property>
  <property>
    <name>dfs.permissions.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.permissions.superusergroup</name>
    <value>hadoop</value>
  </property>
  <property>
    <name>dfs.cluster.administrators</name>
    <value>hadoop</value>
  </property>
  <property>
    <name>dfs.data.transfer.protection</name>
    <value>integrity</value>
  </property>

  <!-- DFS Block -->
  
  <property>
    <!-- 延迟blockreport,避免datanode一起重启时集中汇报。 初始值为0
说明:dn启动后第一次报告自己的block信息的时间是在(0,$(dfs.blockreport.initialDelay ))之间的一个随机数 initTime,
然后从initTime(不同datanode上的这个随即肯定是不一样的)开始每隔dfs.blockreport.intervalMsec 时间,
该datanode会向namenode报告自己的所有block的信息,如果没有initTime,多有datanode都从启动的那一刻开始发,这样会造成大量数据发往nn,造成堵塞 -->
    <name>dfs.blockreport.initialDelay</name>
    <value>180</value>
  </property>
  
  
  
  <property>
  <!--控制DN定期将当前该结点上所有的BLOCK信息报告给NN的时间间隔,默认21600000ms = 1小时-->
    <name>dfs.blockreport.intervalMsec</name>
    <value>21600000</value>
  </property>
  
  
  <property>
    <!-- 即当DataNode本地的Block个数超过1,000,000时才会分盘进行汇报,建议将该参数适当调小 -->
    <name>dfs.blockreport.split.threshold</name>
    <value>1000000</value>
  </property>
  <property>
    <!-- 默认值1MB,磁盘扫描的限速,要注意看看扫描一个磁盘会不会太慢,但设的太高也会影响IO -->
    <name>dfs.block.scanner.volume.bytes.per.second</name>
    <value>1048576</value>
  </property>

  <!-- NameNode -->
  <property>
  <!--预留磁盘的一部分空间给操作系统用,这个参数主要是为了防止磁盘空间被写满导致的HDFS异常。通常系统默认保留5%的磁盘空间给操作系统用。 -->
    <name>dfs.namenode.resource.du.reserved</name>
    <value>100G</value>
  </property>
  <property>
  <!--设置为true来启动namenode以尝试恢复一个以前任何失败的dfs.namenode.name.dir,当启用时,在检查点期间,尝试恢复任何失败的目录-->
    <name>dfs.namenode.name.dir.restore</name>
    <value>false</value>
  </property>
  <property>
    <!-- 查询未检查的检查点事务的执行时间间隔,单位为秒。设置大一些,避免频繁的checkpoint传输, 默认值1000000 -->
    <name>dfs.namenode.checkpoint.txns</name>
    <value>10000000</value>
  </property>


  <property>
    <name>dfs.namenode.rpc-address.earth.nn1</name>
    <value>hadoop-btnn0001.eniot.io:8020</value>
  </property>    <property>
      <name>dfs.namenode.https-address.earth.nn1</name>
      <value>hadoop-btnn0001.eniot.io:50470</value>
    </property>
  <property>
    <name>dfs.namenode.rpc-address.earth.nn2</name>
    <value>hadoop-btnn0002.eniot.io:8020</value>
  </property>    <property>
      <name>dfs.namenode.https-address.earth.nn2</name>
      <value>hadoop-btnn0002.eniot.io:50470</value>
    </property>
  <property>
    <name>dfs.namenode.acls.enabled</name>
    <value>True</value>
  </property>
  <property>
    <name>dfs.namenode.name.dir</name>
    <value>/data1/hdfs/hadoopNNData</value>
    <final>true</final>
  </property>
  <property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://hadoop-btnn0001.eniot.io:8485;hadoop-btnn0002.eniot.io:8485;hadoop-btnn0003.eniot.io:8485/earth</value>
  </property>
  <property>
    <name>dfs.namenode.delegation.token.max-lifetime</name>
    <value>31536000000</value>
  </property>
  <property>
    <name>dfs.namenode.delegation.token.renew-interval</name>
    <value>31536000000</value>
  </property>
  <property>
    <name>dfs.namenode.checkpoint.period</name>
    <value>7200</value>
  </property>


  <!-- DataNode -->
  <property>
    <!-- 常规 磁盘扫描间隔, 当为正值时,datanode将按照设定时间间隔进行块扫描。当为负值时,则禁止块扫描。当为0时,则使用默认的504小时(3周)进行定期扫描。 -->
    <name>dfs.datanode.scan.period.hours</name>
    <value>504</value>
  </property>
  <property>
  <!--datanode的服务器线程数-->
    <name>dfs.datanode.handler.count</name>
    <value>10</value>
  </property>
  <property>
    <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
    <value>0.75f</value>
  </property>
  <property>
  <!--该属性控制datanode卷在被认为不平衡之前允许在空闲磁盘空间上有多少不同字节。如果所有卷的自由空间都在这一范围内,则卷将被认为是平衡的,并且块分配将在纯循环的基础上完成。-->
    <name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
    <value>10737418240</value>
  </property>
  <property>
    <name>dfs.datanode.address</name>
    <value>0.0.0.0:50010</value>
  </property>
  <property>
    <name>dfs.datanode.http.address</name>
    <value>0.0.0.0:50075</value>
  </property>
  <property>
    <name>dfs.datanode.data.dir</name>
    <value>/data1/dfs/data</value>
    <final>true</final>
  </property>
  <property>
   <!-- 平衡的速度 -->
    <name> balancer时,hdfs移动数据的速度,默认值为1M/S的速度。一般情况下设置为10M</name>
    <value>50485760</value>
  </property>
  <property>
  <!-- 平衡的线程数,用于提高平衡效率, 需要重启DataNode -->
    <name>dfs.datanode.balance.max.concurrent.moves</name>
    <value>25</value>
  </property>
  <property>
  <!-- 表示在datanode对磁盘写时候,保留多少非dfs的磁盘空间,从而避免dfs将所在的磁盘写满,默认为0-->
    <name>dfs.datanode.du.reserved</name>
    <value>0</value>
  </property>

  <!-- JournalNode -->
  <property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/data1/hdfs/journal</value>
  </property>

  <!-- Web -->
 
  <property>
    <name>dfs.http.policy</name>
    <value>HTTPS_ONLY</value>
  </property>
  <property>
    <name>dfs.https.port</name>
    <value>50470</value>
  </property>
  <!-- Other -->

  <property>
    <name>dfs.image.transfer.timeout</name>
    <value>1800000</value>
  </property>
  <property>
  <!-- image传输限速,占用所有带宽会影响namenode rpc请求,重启active namenode才生效 -->
    <name>dfs.image.transfer.bandwidthPerSec</name>
    <value>5242880</value>
  </property>
  <property>
    <name>dfs.hosts.exclude</name>
    <value>/usr/local/hadoop/hadoop-release/etc/hadoop/hosts-dfs.exclude</value>
  </property>
  <property>
    <name>dfs.block.access.token.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.client.read.shortcircuit</name>
    <value>true</value>
  </property>
  <property>
    <name>dfs.domain.socket.path</name>
    <value>/var/lib/hadoop-hdfs/dn_socket</value>
  </property>
  <property>
    <name>dfs.client.failover.proxy.provider.earth</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
  </property>

<!-- kerberos -->
  <property>
    <name>dfs.namenode.keytab.file</name>
    <value>/etc/security/keytab/hdfs.keytab</value>
  </property>
  <property>
    <name>dfs.namenode.kerberos.principal</name>
    <value>hdfs/_HOST@ENIOT.IO</value>
  </property>

  <property>
    <name>dfs.datanode.keytab.file</name>
    <value>/etc/security/keytab/hdfs.keytab</value>
  </property>
  <property>
    <name>dfs.datanode.kerberos.principal</name>
    <value>hdfs/_HOST@ENIOT.IO</value>
  </property>

  <property>
    <name>dfs.journalnode.keytab.file</name>
    <value>/etc/security/keytab/hdfs.keytab</value>
  </property>
  <property>
    <name>dfs.journalnode.kerberos.principal</name>
    <value>hdfs/_HOST@ENIOT.IO</value>
  </property>
  <property>
    <name>dfs.journalnode.kerberos.internal.spnego.principal</name>
    <value>HTTP/_HOST@ENIOT.IO</value>
  </property>

  <property>
    <name>dfs.web.authentication.kerberos.keytab</name>
    <value>/etc/security/keytab/hdfs.keytab</value>
  </property>
  <property>
    <name>dfs.web.authentication.kerberos.principal</name>
    <value>HTTP/_HOST@ENIOT.IO</value>
  </property>

  <property>
        <name>dfs.permissions</name>
        <value>true</value>
    </property>
    <property>
        <name>dfs.namenode.inode.attributes.provider.class</name>
        <value>org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer</value>
    </property>
	
	<!--增大 journalnode start-segment、select、write timeout时间,参数如下,默认时间都是20s,
	可以适当调大,如:120s(如果真遇到长时间网络问题此配置将依旧无效,此配置仅为避免 短时间、间接性的网络抖动)-->
<property>
<name>dfs.qjournal.start-segment.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.prepare-recovery.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.accept-recovery.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.finalize-segment.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.select-input-streams.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.get-journal-state.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.new-epoch.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.write-txns.timeout.ms</name>
<value>20000</value>
</property>
	
</configuration>

yarn-site.xml

<configuration>
  <property>
    <name>yarn.http.policy</name>
    <value>HTTPS_ONLY</value>
  </property>
  <property>
    <name>yarn.acl.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.admin.acl</name>
    <value>yarn,hadoop</value>
  </property>
  <property>
    <name>yarn.web-proxy.address</name>
    <value>hadoop-btnn0003.eniot.io:8089</value>
  </property>
  <!-- Resource Manager -->
  <property>
    <name>yarn.resourcemanager.zk-state-store.parent-path</name>
    <value>/rmstore_batch</value>
 </property>
  <property>
    <name>yarn.resourcemanager.cluster-id</name>
    <value>yarn-cluster-batch</value>
  </property>
  <property>
    <name>yarn.resourcemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.resourcemanager.ha.rm-ids</name>
    <value>rm1,rm2</value>
  </property>
  <property>
    <name>yarn.resourcemanager.connect.retry-interval.ms</name>
    <value>2000</value>
  </property>
  <property>
    <name>yarn.resourcemanager.am.max-retries</name>
    <value>5</value>
  </property>


  <!-- Zookeeper -->
  <property>
    <name>yarn.resourcemanager.zk-acl</name>
    <value>world:anyone:rwcda</value>
  </property>
  <property>
    <name>yarn.resourcemanager.zk-address</name>
    <value>hadoop-btzk0001.eniot.io:2181,hadoop-btzk0002.eniot.io:2181,hadoop-btzk0003.eniot.io:2181</value>
  </property>
  <property>
    <name>yarn.resourcemanager.store.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
  </property>

  <property>
    <name>yarn.resourcemanager.hostname.rm1</name>
    <value>hadoop-btnn0001.eniot.io</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm1</name>
    <value>hadoop-btnn0001.eniot.io:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm1</name>
    <value>hadoop-btnn0001.eniot.io:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm1</name>
    <value>hadoop-btnn0001.eniot.io:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm1</name>
    <value>hadoop-btnn0001.eniot.io:8033</value>
  </property>

    <property>
    <name>yarn.resourcemanager.webapp.https.address.rm1</name>
    <value>hadoop-btnn0001.eniot.io:8090</value>
  </property>
  
  <property>
    <name>yarn.resourcemanager.hostname.rm2</name>
    <value>hadoop-btnn0002.eniot.io</value>
  </property>
  <property>
    <name>yarn.resourcemanager.scheduler.address.rm2</name>
    <value>hadoop-btnn0002.eniot.io:8030</value>
  </property>
  <property>
    <name>yarn.resourcemanager.resource-tracker.address.rm2</name>
    <value>hadoop-btnn0002.eniot.io:8031</value>
  </property>
  <property>
    <name>yarn.resourcemanager.address.rm2</name>
    <value>hadoop-btnn0002.eniot.io:8032</value>
  </property>
  <property>
    <name>yarn.resourcemanager.admin.address.rm2</name>
    <value>hadoop-btnn0002.eniot.io:8033</value>
  </property>

    <property>
    <name>yarn.resourcemanager.webapp.https.address.rm2</name>
    <value>hadoop-btnn0002.eniot.io:8090</value>
  </property>
  
  

  <property>
    <name>yarn.log.server.url</name>
    <value>https://hadoop-btnn0003.eniot.io:19888/jobhistory/logs</value>
  </property>

  <property>
    <name>yarn.resourcemanager.nodes.exclude-path</name>
    <value>/usr/local/hadoop/hadoop-release/etc/hadoop/hosts-yarn.exclude</value>
  </property>
  <property>
    <name>yarn.resourcemanager.proxy-user-privileges.enabled</name>
    <value>true</value>
  </property>
  
  <!-- NodeManager -->
  <!-- NM SECURITY -->
  <property>
    <name>yarn.nodemanager.linux-container-executor.group</name>
    <value>hadoop</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
    <value>0.25</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
    <value>95.0</value>
  </property>
  <property>
    <name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
    <value>0</value>
  </property>
  <property>
    <name>yarn.nodemanager.vmem-check-enabled</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.nodemanager.recovery.enabled</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.nodemanager.linux-container-executor.path</name>
    <value>/usr/local/hadoop/hadoop-release/bin/container-executor</value>
  </property>
  <property>
    <name>yarn.nodemanager.address</name>
    <value>0.0.0.0:45454</value>
  </property>
  <property>
    <name>yarn.nodemanager.local-dirs</name>
    <value>/data1/yarn/local</value>
  </property>
  <property>
    <name>yarn.nodemanager.log-dirs</name>
    <value>/data1/yarn/userlogs</value>
  </property>
  <property>
    <name>yarn.nodemanager.log.retain-seconds</name>
    <value>10800</value>
  </property>
  <property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.cpu-vcores</name>
    <value>4</value>
  </property>
  <property>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>28942</value>
  </property>
  
  <!-- Scheduler -->
  <property>
    <name>yarn.resourcemanager.scheduler.class</name>
    <value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
  </property>
  <property>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>20480</value>
  </property>
  <property>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>128</value>
  </property>
  <property>
    <name>yarn.scheduler.increment-allocation-mb</name>
    <value>128</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.allocation.file</name>
    <value>/usr/local/hadoop/hadoop-release/etc/hadoop/fair-scheduler.xml</value>
  </property>
  <property>
   <!-- 在rm配置,一次分配中,每台机器最大分配任务数 -->
    <name>yarn.scheduler.fair.max.assign</name>
    <value>10</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.user-as-default-queue</name>
    <value>false</value>
  </property>
  <property>
    <name>yarn.scheduler.fair.allow-undeclared-pools</name>
    <value>false</value>
  </property>
  
  
  <!-- Log Aggregation -->
  <property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
  </property>
  <property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>604800</value>
  </property>
  <property>
    <name>yarn.log-aggregation.retain-check-interval-seconds</name>
    <value>86400</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir</name>
    <value>/mr-history</value>
  </property>
  <property>
    <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
    <value>logs</value>
  </property>
  <property>
     <name>yarn.nodemanager.localizer.cache.cleanup.interval-ms</name>
     <value>600000</value>
  </property>
  <property>
     <name>yarn.nodemanager.log-aggregator.on-fail.remain-log-in-sec</name>
     <value>259200</value>
  </property>

<!-- kerberos -->
  <property>
    <name>yarn.nodemanager.principal</name>
    <value>yarn/_HOST@ENIOT.IO</value>
  </property>
  <property>
    <name>yarn.nodemanager.keytab</name>
    <value>/etc/security/keytab/yarn.keytab</value>
  </property>
  <property>
    <name>yarn.resourcemanager.keytab</name>
    <value>/etc/security/keytab/yarn.keytab</value>
  </property>
  <property>
    <name>yarn.resourcemanager.principal</name>
    <value>yarn/_HOST@ENIOT.IO</value>
  </property>
  <property>
    <name>yarn.web-proxy.keytab</name>
    <value>/etc/security/keytab/yarn.keytab</value>
  </property>
  <property>
    <name>yarn.web-proxy.principal</name>
    <value>HTTP/_HOST@ENIOT.IO</value>
  </property>

<property>
  <name>yarn.nodemanager.container-executor.class</name>
  <value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
</property>

<!-- cgroup 
<property>
   <name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
   <value>True</value>
</property>

<property>
   <name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name>
   <value>95</value>
</property>

<property>
   <name>yarn.nodemanager.resource.cpu.enable</name>
   <value>True</value>
</property>


<property>
  <name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
  <value>org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler</value>
</property>

<property>
  <name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
  <value>/hadoop-yarn</value>
</property>

<property>
  <name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
  <value>/sys/fs/cgroup</value>
</property>

<property>
  <name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
  <value>True</value>
</property>
-->
</configuration>

mapred-site.xm

<configuration>
  <property>
    <name>yarn.app.mapreduce.am.resource.mb</name>
    <value>2048</value>
  </property>
  <property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
  </property>
  <property>
    <name>mapreduce.cluster.local.dir</name>
    <value>/data1/yarn/mapred/data</value>
  </property>
  <property>
    <name>mapreduce.task.io.sort.mb</name>
    <value>300</value>
  </property>
  <property>
    <name>mapred.child.env</name>
    <value>LD_LIBRARY_PATH=/usr/local/hadoop/hadoop-release/lib/libexec</value>
  </property>
  <property>
    <name>mapred.child.java.opts</name>
    <value>-Xmx3072m -XX:-UseGCOverheadLimit</value>
  </property>
  <property>
    <name>mapreduce.tasktracker.http.address</name>
    <value>0.0.0.0:50060</value>
  </property>
  <!-- JobHistory -->
  <property>
    <!-- 限制container输出的日志不要太大,设置为50MB,注意要设置log.backups,不然会使用内存 -->
    <name>mapreduce.task.userlog.limit.kb</name>
    <value>51200</value>
  </property>
  <!-- 
  <property>
    <name>yarn.app.mapreduce.am.container.log.limit.kb</name>
    <value>51200</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.container.log.backups</name>
    <value>20</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.shuffle.log.limit.kb</name>
    <value>51200</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.shuffle.log.backups</name>
    <value>10</value>
  </property> 
  -->
  <property>
    <name>mapreduce.reduce.shuffle.memory.limit.percent</name>
    <value>0.25</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.task.container.log.backups</name>
    <value>1</value>
  </property>
  <property>
    <name>yarn.app.mapreduce.am.staging-dir</name>
    <value>/tmp/hadoop-yarn/staging</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.done-dir</name>
    <value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.address</name>
    <value>hadoop-btnn0003.eniot.io:10020</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.cleaner.enable</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.max-age-ms</name>
    <value>604800000</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.cleaner.interval-ms</name>
    <value>86400000</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.move.interval-ms</name>
    <value>180000</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.http.policy</name>
    <value>HTTPS_ONLY</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.webapp.https.address</name>
    <value>hadoop-btnn0003.eniot.io:19888</value>
  </property>
  
  <!-- Compression -->
  <property>
    <name>mapreduce.map.speculative</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.reduce.speculative</name>
    <value>false</value>
  </property>
  <property>
    <name>mapreduce.map.output.compress</name>
    <value>true</value>
  </property>
  <property>
    <name>mapreduce.map.output.compress.codec</name>
    <value>com.hadoop.compression.lzo.LzoCodec</value>
  </property>
  <!-- Memory setting -->
  <property>
    <name>mapreduce.map.memory.mb</name>
    <value>2048</value>
  </property>
  <property>
    <name>mapreduce.reduce.memory.mb</name>
    <value>4096</value>
  </property>
  <property>
    <name>mapreduce.map.java.opts</name>
    <value>-Duser.home=. -Djava.net.preferIPv4Stack=true -Xmx1800m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70</value>
  </property>
  <property>
    <name>mapreduce.reduce.java.opts</name>
    <value>-Duser.home=. -Djava.net.preferIPv4Stack=true -Xmx3600m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70</value>
  </property>

<!-- kerberos -->
  <property>
    <name>mapreduce.jobhistory.keytab</name>
    <value>/etc/security/keytab/mapred.keytab</value>
  </property>
  <property>
    <name>mapreduce.jobhistory.principal</name>
    <value>mapred/_HOST@ENIOT.IO</value>
  </property>

</configuration>

fail-schduler.xml

<?xml version="1.0" encoding="UTF-8"?><allocations>
<queue name="streaming_o15682730980551">
<minResources>0 mb, 0 vcores</minResources>
<maxResources>0 mb, 0 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="solar_prod">
<minResources>32768 mb, 5 vcores</minResources>
<maxResources>163840 mb, 24 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="streaming_o15686870430731">
<minResources>0 mb, 0 vcores</minResources>
<maxResources>0 mb, 0 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="integration">
<minResources>32768 mb, 16 vcores</minResources>
<maxResources>32768 mb, 16 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="default">
<minResources>0 mb, 0 vcores</minResources>
<maxResources>8192 mb, 4 vcores</maxResources>
<maxRunningApps>10</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
</allocations>

other

  • dfs.datanode.max.transfer.threads: 数据传输最大线程,默认:16384

  • dfs.datanode.ipc.address:
    DN的IPC监听端口,写0的话监听在随机端口通过心跳传输给NN,默认:8010

  • dfs.namenode.checkpoint.dir:
    standy NN节点存储 checkpoint
    文件目录,默认:/hadoop/hdfs/namesecondary

  • dfs.namenode.stale.datanode.interval:
    标记一个dn为“down机”时间间隔,即:如果
    nn没有接到一个dn心跳超过这个时间间隔,dn将标记为“过期”,过期的间隔不能太小 (默认: 3000)

  • dfs.namenode.safemode.threshold-pct
    指定应有多少比例的数据块满足最小副本数要求,默认0.999f 说明:
    (NN在启动的时候首先进入安全模式,如果DN丢失的block达到此参数设置的比例,则系统会一直处于安全模式状态即只读状态。如果设为1则HDFS永远是处于SafeMode)

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值