core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://earth</value>
<final>true</final>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data1/tmp-security</value>
<final>true</final>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop-btzk0001.eniot.io:2181,hadoop-btzk0002.eniot.io:2181,hadoop-btzk0003.eniot.io:2181</value>
</property>
<property>
<name>ha.failover-controller.active-standby-elector.zk.op.retries</name>
<value>120</value>
</property>
<property>
<!-- 磁盘du间隔,du对磁盘IO影响比较大 -->
<name>fs.du.interval</name>
<value>1200000</value>
</property>
<property>
<name>fs.df.interval</name>
<value>60000</value>
</property>
<property>
<!-- 这个是开启hdfs文件删除自动转移到垃圾箱的选项,值为垃圾箱文件清除时间,单位是分钟,默认:360-->
<name>fs.trash.interval</name>
<value>1440</value>
<final>true</final>
</property>
<property>
<!-- SequenceFiles在读写中可以使用的缓存大小,默认设置:131072-->
<name>io.file.buffer.size</name>
<value>65536</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,com.hadoop.compression.lzo.LzoCodec,com.hadoop.compression.lzo.LzopCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.Lz4Codec</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>lzo.text.input.format.ignore.nonlzo</name>
<value>false</value>
</property>
<property>
<name>hadoop.ssl.hostname.verifier</name>
<value>ALLOW_ALL</value>
</property>
hdfs-site.xml
<configuration>
<!-- HDFS Common -->
<property>
<!-- 启用了keberos,页面上查看hdfs目录需要配置kerberos客户端 -->
<name>dfs.webhdfs.enabled</name>
<value>false</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>earth</value>
</property>
<property>
<name>dfs.ha.namenodes.earth</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence
shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hdfs/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.superusergroup</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.cluster.administrators</name>
<value>hadoop</value>
</property>
<property>
<name>dfs.data.transfer.protection</name>
<value>integrity</value>
</property>
<!-- DFS Block -->
<property>
<!-- 延迟blockreport,避免datanode一起重启时集中汇报。 初始值为0
说明:dn启动后第一次报告自己的block信息的时间是在(0,$(dfs.blockreport.initialDelay ))之间的一个随机数 initTime,
然后从initTime(不同datanode上的这个随即肯定是不一样的)开始每隔dfs.blockreport.intervalMsec 时间,
该datanode会向namenode报告自己的所有block的信息,如果没有initTime,多有datanode都从启动的那一刻开始发,这样会造成大量数据发往nn,造成堵塞 -->
<name>dfs.blockreport.initialDelay</name>
<value>180</value>
</property>
<property>
<!--控制DN定期将当前该结点上所有的BLOCK信息报告给NN的时间间隔,默认21600000ms = 1小时-->
<name>dfs.blockreport.intervalMsec</name>
<value>21600000</value>
</property>
<property>
<!-- 即当DataNode本地的Block个数超过1,000,000时才会分盘进行汇报,建议将该参数适当调小 -->
<name>dfs.blockreport.split.threshold</name>
<value>1000000</value>
</property>
<property>
<!-- 默认值1MB,磁盘扫描的限速,要注意看看扫描一个磁盘会不会太慢,但设的太高也会影响IO -->
<name>dfs.block.scanner.volume.bytes.per.second</name>
<value>1048576</value>
</property>
<!-- NameNode -->
<property>
<!--预留磁盘的一部分空间给操作系统用,这个参数主要是为了防止磁盘空间被写满导致的HDFS异常。通常系统默认保留5%的磁盘空间给操作系统用。 -->
<name>dfs.namenode.resource.du.reserved</name>
<value>100G</value>
</property>
<property>
<!--设置为true来启动namenode以尝试恢复一个以前任何失败的dfs.namenode.name.dir,当启用时,在检查点期间,尝试恢复任何失败的目录-->
<name>dfs.namenode.name.dir.restore</name>
<value>false</value>
</property>
<property>
<!-- 查询未检查的检查点事务的执行时间间隔,单位为秒。设置大一些,避免频繁的checkpoint传输, 默认值1000000 -->
<name>dfs.namenode.checkpoint.txns</name>
<value>10000000</value>
</property>
<property>
<name>dfs.namenode.rpc-address.earth.nn1</name>
<value>hadoop-btnn0001.eniot.io:8020</value>
</property> <property>
<name>dfs.namenode.https-address.earth.nn1</name>
<value>hadoop-btnn0001.eniot.io:50470</value>
</property>
<property>
<name>dfs.namenode.rpc-address.earth.nn2</name>
<value>hadoop-btnn0002.eniot.io:8020</value>
</property> <property>
<name>dfs.namenode.https-address.earth.nn2</name>
<value>hadoop-btnn0002.eniot.io:50470</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>True</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data1/hdfs/hadoopNNData</value>
<final>true</final>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop-btnn0001.eniot.io:8485;hadoop-btnn0002.eniot.io:8485;hadoop-btnn0003.eniot.io:8485/earth</value>
</property>
<property>
<name>dfs.namenode.delegation.token.max-lifetime</name>
<value>31536000000</value>
</property>
<property>
<name>dfs.namenode.delegation.token.renew-interval</name>
<value>31536000000</value>
</property>
<property>
<name>dfs.namenode.checkpoint.period</name>
<value>7200</value>
</property>
<!-- DataNode -->
<property>
<!-- 常规 磁盘扫描间隔, 当为正值时,datanode将按照设定时间间隔进行块扫描。当为负值时,则禁止块扫描。当为0时,则使用默认的504小时(3周)进行定期扫描。 -->
<name>dfs.datanode.scan.period.hours</name>
<value>504</value>
</property>
<property>
<!--datanode的服务器线程数-->
<name>dfs.datanode.handler.count</name>
<value>10</value>
</property>
<property>
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-preference-fraction</name>
<value>0.75f</value>
</property>
<property>
<!--该属性控制datanode卷在被认为不平衡之前允许在空闲磁盘空间上有多少不同字节。如果所有卷的自由空间都在这一范围内,则卷将被认为是平衡的,并且块分配将在纯循环的基础上完成。-->
<name>dfs.datanode.available-space-volume-choosing-policy.balanced-space-threshold</name>
<value>10737418240</value>
</property>
<property>
<name>dfs.datanode.address</name>
<value>0.0.0.0:50010</value>
</property>
<property>
<name>dfs.datanode.http.address</name>
<value>0.0.0.0:50075</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data1/dfs/data</value>
<final>true</final>
</property>
<property>
<!-- 平衡的速度 -->
<name> balancer时,hdfs移动数据的速度,默认值为1M/S的速度。一般情况下设置为10M</name>
<value>50485760</value>
</property>
<property>
<!-- 平衡的线程数,用于提高平衡效率, 需要重启DataNode -->
<name>dfs.datanode.balance.max.concurrent.moves</name>
<value>25</value>
</property>
<property>
<!-- 表示在datanode对磁盘写时候,保留多少非dfs的磁盘空间,从而避免dfs将所在的磁盘写满,默认为0-->
<name>dfs.datanode.du.reserved</name>
<value>0</value>
</property>
<!-- JournalNode -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data1/hdfs/journal</value>
</property>
<!-- Web -->
<property>
<name>dfs.http.policy</name>
<value>HTTPS_ONLY</value>
</property>
<property>
<name>dfs.https.port</name>
<value>50470</value>
</property>
<!-- Other -->
<property>
<name>dfs.image.transfer.timeout</name>
<value>1800000</value>
</property>
<property>
<!-- image传输限速,占用所有带宽会影响namenode rpc请求,重启active namenode才生效 -->
<name>dfs.image.transfer.bandwidthPerSec</name>
<value>5242880</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/usr/local/hadoop/hadoop-release/etc/hadoop/hosts-dfs.exclude</value>
</property>
<property>
<name>dfs.block.access.token.enable</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/var/lib/hadoop-hdfs/dn_socket</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.earth</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- kerberos -->
<property>
<name>dfs.namenode.keytab.file</name>
<value>/etc/security/keytab/hdfs.keytab</value>
</property>
<property>
<name>dfs.namenode.kerberos.principal</name>
<value>hdfs/_HOST@ENIOT.IO</value>
</property>
<property>
<name>dfs.datanode.keytab.file</name>
<value>/etc/security/keytab/hdfs.keytab</value>
</property>
<property>
<name>dfs.datanode.kerberos.principal</name>
<value>hdfs/_HOST@ENIOT.IO</value>
</property>
<property>
<name>dfs.journalnode.keytab.file</name>
<value>/etc/security/keytab/hdfs.keytab</value>
</property>
<property>
<name>dfs.journalnode.kerberos.principal</name>
<value>hdfs/_HOST@ENIOT.IO</value>
</property>
<property>
<name>dfs.journalnode.kerberos.internal.spnego.principal</name>
<value>HTTP/_HOST@ENIOT.IO</value>
</property>
<property>
<name>dfs.web.authentication.kerberos.keytab</name>
<value>/etc/security/keytab/hdfs.keytab</value>
</property>
<property>
<name>dfs.web.authentication.kerberos.principal</name>
<value>HTTP/_HOST@ENIOT.IO</value>
</property>
<property>
<name>dfs.permissions</name>
<value>true</value>
</property>
<property>
<name>dfs.namenode.inode.attributes.provider.class</name>
<value>org.apache.ranger.authorization.hadoop.RangerHdfsAuthorizer</value>
</property>
<!--增大 journalnode start-segment、select、write timeout时间,参数如下,默认时间都是20s,
可以适当调大,如:120s(如果真遇到长时间网络问题此配置将依旧无效,此配置仅为避免 短时间、间接性的网络抖动)-->
<property>
<name>dfs.qjournal.start-segment.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.prepare-recovery.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.accept-recovery.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.finalize-segment.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.select-input-streams.timeout.ms</name>
<value>20000</value>
</property>
<property>
<name>dfs.qjournal.get-journal-state.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.new-epoch.timeout.ms</name>
<value>120000</value>
</property>
<property>
<name>dfs.qjournal.write-txns.timeout.ms</name>
<value>20000</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.http.policy</name>
<value>HTTPS_ONLY</value>
</property>
<property>
<name>yarn.acl.enable</name>
<value>true</value>
</property>
<property>
<name>yarn.admin.acl</name>
<value>yarn,hadoop</value>
</property>
<property>
<name>yarn.web-proxy.address</name>
<value>hadoop-btnn0003.eniot.io:8089</value>
</property>
<!-- Resource Manager -->
<property>
<name>yarn.resourcemanager.zk-state-store.parent-path</name>
<value>/rmstore_batch</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarn-cluster-batch</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.am.max-retries</name>
<value>5</value>
</property>
<!-- Zookeeper -->
<property>
<name>yarn.resourcemanager.zk-acl</name>
<value>world:anyone:rwcda</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop-btzk0001.eniot.io:2181,hadoop-btzk0002.eniot.io:2181,hadoop-btzk0003.eniot.io:2181</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop-btnn0001.eniot.io</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>hadoop-btnn0001.eniot.io:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>hadoop-btnn0001.eniot.io:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>hadoop-btnn0001.eniot.io:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>hadoop-btnn0001.eniot.io:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>hadoop-btnn0001.eniot.io:8090</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop-btnn0002.eniot.io</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>hadoop-btnn0002.eniot.io:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>hadoop-btnn0002.eniot.io:8031</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>hadoop-btnn0002.eniot.io:8032</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>hadoop-btnn0002.eniot.io:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>hadoop-btnn0002.eniot.io:8090</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>https://hadoop-btnn0003.eniot.io:19888/jobhistory/logs</value>
</property>
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/usr/local/hadoop/hadoop-release/etc/hadoop/hosts-yarn.exclude</value>
</property>
<property>
<name>yarn.resourcemanager.proxy-user-privileges.enabled</name>
<value>true</value>
</property>
<!-- NodeManager -->
<!-- NM SECURITY -->
<property>
<name>yarn.nodemanager.linux-container-executor.group</name>
<value>hadoop</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
<value>0.25</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
<value>95.0</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
<value>0</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.path</name>
<value>/usr/local/hadoop/hadoop-release/bin/container-executor</value>
</property>
<property>
<name>yarn.nodemanager.address</name>
<value>0.0.0.0:45454</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data1/yarn/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data1/yarn/userlogs</value>
</property>
<property>
<name>yarn.nodemanager.log.retain-seconds</name>
<value>10800</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>4</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>28942</value>
</property>
<!-- Scheduler -->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>20480</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>128</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>/usr/local/hadoop/hadoop-release/etc/hadoop/fair-scheduler.xml</value>
</property>
<property>
<!-- 在rm配置,一次分配中,每台机器最大分配任务数 -->
<name>yarn.scheduler.fair.max.assign</name>
<value>10</value>
</property>
<property>
<name>yarn.scheduler.fair.user-as-default-queue</name>
<value>false</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<!-- Log Aggregation -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<property>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>86400</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/mr-history</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<property>
<name>yarn.nodemanager.localizer.cache.cleanup.interval-ms</name>
<value>600000</value>
</property>
<property>
<name>yarn.nodemanager.log-aggregator.on-fail.remain-log-in-sec</name>
<value>259200</value>
</property>
<!-- kerberos -->
<property>
<name>yarn.nodemanager.principal</name>
<value>yarn/_HOST@ENIOT.IO</value>
</property>
<property>
<name>yarn.nodemanager.keytab</name>
<value>/etc/security/keytab/yarn.keytab</value>
</property>
<property>
<name>yarn.resourcemanager.keytab</name>
<value>/etc/security/keytab/yarn.keytab</value>
</property>
<property>
<name>yarn.resourcemanager.principal</name>
<value>yarn/_HOST@ENIOT.IO</value>
</property>
<property>
<name>yarn.web-proxy.keytab</name>
<value>/etc/security/keytab/yarn.keytab</value>
</property>
<property>
<name>yarn.web-proxy.principal</name>
<value>HTTP/_HOST@ENIOT.IO</value>
</property>
<property>
<name>yarn.nodemanager.container-executor.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.LinuxContainerExecutor</value>
</property>
<!-- cgroup
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.strict-resource-usage</name>
<value>True</value>
</property>
<property>
<name>yarn.nodemanager.resource.percentage-physical-cpu-limit</name>
<value>95</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu.enable</name>
<value>True</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.resources-handler.class</name>
<value>org.apache.hadoop.yarn.server.nodemanager.util.CgroupsLCEResourcesHandler</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.hierarchy</name>
<value>/hadoop-yarn</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.mount-path</name>
<value>/sys/fs/cgroup</value>
</property>
<property>
<name>yarn.nodemanager.linux-container-executor.cgroups.mount</name>
<value>True</value>
</property>
-->
</configuration>
mapred-site.xm
<configuration>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.cluster.local.dir</name>
<value>/data1/yarn/mapred/data</value>
</property>
<property>
<name>mapreduce.task.io.sort.mb</name>
<value>300</value>
</property>
<property>
<name>mapred.child.env</name>
<value>LD_LIBRARY_PATH=/usr/local/hadoop/hadoop-release/lib/libexec</value>
</property>
<property>
<name>mapred.child.java.opts</name>
<value>-Xmx3072m -XX:-UseGCOverheadLimit</value>
</property>
<property>
<name>mapreduce.tasktracker.http.address</name>
<value>0.0.0.0:50060</value>
</property>
<!-- JobHistory -->
<property>
<!-- 限制container输出的日志不要太大,设置为50MB,注意要设置log.backups,不然会使用内存 -->
<name>mapreduce.task.userlog.limit.kb</name>
<value>51200</value>
</property>
<!--
<property>
<name>yarn.app.mapreduce.am.container.log.limit.kb</name>
<value>51200</value>
</property>
<property>
<name>yarn.app.mapreduce.am.container.log.backups</name>
<value>20</value>
</property>
<property>
<name>yarn.app.mapreduce.shuffle.log.limit.kb</name>
<value>51200</value>
</property>
<property>
<name>yarn.app.mapreduce.shuffle.log.backups</name>
<value>10</value>
</property>
-->
<property>
<name>mapreduce.reduce.shuffle.memory.limit.percent</name>
<value>0.25</value>
</property>
<property>
<name>yarn.app.mapreduce.task.container.log.backups</name>
<value>1</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/tmp/hadoop-yarn/staging</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-btnn0003.eniot.io:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.cleaner.enable</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobhistory.max-age-ms</name>
<value>604800000</value>
</property>
<property>
<name>mapreduce.jobhistory.cleaner.interval-ms</name>
<value>86400000</value>
</property>
<property>
<name>mapreduce.jobhistory.move.interval-ms</name>
<value>180000</value>
</property>
<property>
<name>mapreduce.jobhistory.http.policy</name>
<value>HTTPS_ONLY</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.https.address</name>
<value>hadoop-btnn0003.eniot.io:19888</value>
</property>
<!-- Compression -->
<property>
<name>mapreduce.map.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.reduce.speculative</name>
<value>false</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<!-- Memory setting -->
<property>
<name>mapreduce.map.memory.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>4096</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Duser.home=. -Djava.net.preferIPv4Stack=true -Xmx1800m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Duser.home=. -Djava.net.preferIPv4Stack=true -Xmx3600m -XX:+UseParNewGC -XX:+UseConcMarkSweepGC -XX:+UseCMSCompactAtFullCollection -XX:CMSFullGCsBeforeCompaction=15 -XX:CMSInitiatingOccupancyFraction=70</value>
</property>
<!-- kerberos -->
<property>
<name>mapreduce.jobhistory.keytab</name>
<value>/etc/security/keytab/mapred.keytab</value>
</property>
<property>
<name>mapreduce.jobhistory.principal</name>
<value>mapred/_HOST@ENIOT.IO</value>
</property>
</configuration>
fail-schduler.xml
<?xml version="1.0" encoding="UTF-8"?><allocations>
<queue name="streaming_o15682730980551">
<minResources>0 mb, 0 vcores</minResources>
<maxResources>0 mb, 0 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="solar_prod">
<minResources>32768 mb, 5 vcores</minResources>
<maxResources>163840 mb, 24 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="streaming_o15686870430731">
<minResources>0 mb, 0 vcores</minResources>
<maxResources>0 mb, 0 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="integration">
<minResources>32768 mb, 16 vcores</minResources>
<maxResources>32768 mb, 16 vcores</maxResources>
<maxRunningApps>50</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
<queue name="default">
<minResources>0 mb, 0 vcores</minResources>
<maxResources>8192 mb, 4 vcores</maxResources>
<maxRunningApps>10</maxRunningApps>
<schedulingMode>fifo</schedulingMode>
<weight>1.0</weight>
<minSharePreemptionTimeout>600</minSharePreemptionTimeout>
</queue>
</allocations>
other
-
dfs.datanode.max.transfer.threads: 数据传输最大线程,默认:16384
-
dfs.datanode.ipc.address:
DN的IPC监听端口,写0的话监听在随机端口通过心跳传输给NN,默认:8010 -
dfs.namenode.checkpoint.dir:
standy NN节点存储 checkpoint
文件目录,默认:/hadoop/hdfs/namesecondary -
dfs.namenode.stale.datanode.interval:
标记一个dn为“down机”时间间隔,即:如果
nn没有接到一个dn心跳超过这个时间间隔,dn将标记为“过期”,过期的间隔不能太小 (默认: 3000) -
dfs.namenode.safemode.threshold-pct
指定应有多少比例的数据块满足最小副本数要求,默认0.999f 说明:
(NN在启动的时候首先进入安全模式,如果DN丢失的block达到此参数设置的比例,则系统会一直处于安全模式状态即只读状态。如果设为1则HDFS永远是处于SafeMode)