高可用配置
修改配置文件(路径:/hadoop/etc/hadoop/下)
<configuration>
<!-- hdfs的nameservics命名为ns -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns</value>
</property>
<!--指定hadoop数据存放目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>opt/hadoop/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop001:2181,hadoop002:2181,hadoop003:2181</value>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
</configuration>
<configuration>
<!--指定hdfs的nameservice为ns,需要和core-site.xml中的保持一致-->
<property>
<name>dfs.nameservices</name>
<value>ns</value>
</property>
<!--指定ns下的namenode节点-->
<property>
<name>dfs.ha.namenodes.ns</name>
<value>hadoop001,hadoop002</value>
</property>
<!--设置hadoop001的hhtp通信地址:50070端口-->
<property>
<name>dfs.namenode.http-address.ns.hadoop001</name>
<value>hadoop001:50070</value>
</property>
<!--设置hadoop002的hhtp通信地址:50070端口-->
<property>
<name>dfs.namenode.http-address.ns.hadoop002</name>
<value>hadoop002:50070</value>
</property>
<!--设置hadoop001的rpc通信地址:9000端口-->
<property>
<name>dfs.namenode.rpc-address.ns.hadoop001</name>
<value>hadoop001:9000</value>
</property>
<!--设置hadoop002的rpc通信地址:9000端口-->
<property>
<name>dfs.namenode.rpc-address.ns.hadoop002</name>
<value>hadoop002:9000</value>
</property>
<!--namenode的元数据在journalnode(datanode)的存放位置:8485端口-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop001:8485;hadoop002:8485;hadoop003:8485/ns1</value>
</property>
<!-- 配置journalnode本地存放数据路径 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/journalnode</value>
</property>
<!--开启namenode故障时自动切换功能-->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--设置namenode自动切换方式-->
<property>
<name>dfs.client.failover.proxy.provider.ns</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--设置隔离机制,多个机制换行分割,每个机制占用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!--超时设置-->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!--备份数量-->
<property>
<name>dfs.replication</name>
<value>3</value>
<!-- 在NN和DN上开启WebHDFS (REST API)功能,不是必须 -->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!--namenode文件存储路径-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/hadoop/tmp/data/namenode</value>
</property>
<!--datanode文件存储路径-->
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/hadoop/tmp/data/datanode</value>
</property>
</configuration>
<configuration>
<!-- 指定mapReduce运行在year上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 配置历史服务节点 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop001.101:10020</value>
</property>
<!--web网页访问配置-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop001:19888</value>
</property>
</configuration>
<configuration>
<!-- 开启rm(高可用) -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定rm集群cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>cluster-id</value>
</property>
<!-- 指定主、备rm的名称 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 指定rm映射关系 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop001</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop002</value>
</property>
<!-- 开放端口地址 -->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop001:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop002:8088</value>
</property>
<!-- 配置zookeeper集群 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop001:2181,hadoop002:2181,hadoop003:2181</value>
</property>
<!-- reducer获取数据方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<!-- 日志聚集功能使用 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
- 修改slaves文件:加入所有节点名
- 将配置好的文件传至其他节点(省事):
scp etc/hadoop/*.xml root@主机名:$PWD/etc/hadoop
- 启动所有节点的journalnode服务:
hadoop-daemon.sh start journalnode
- 格式化(仅主节点):
hadoop namenode -format
- 将主节点hadoop下的tmp目录传至其他节点:
scp -r tmp root@主机名:$PWD
- 启动所有节点的zookeeper:
zkServer.sh start
//启动后可以查看一下zk状态zkServer status
- 启动hadoop(仅主节点):
start-all.sh
- 启动备用节点上的resourcemanager:
yarn-daemon.sh start resourcemanager
- 主节点格式化zkfc(首次启动):
hdfs zkfc -formatZK
- 主备启动zkfc(首次启动):
hadoop-daemon.sh start zkfc
- 启动jobhistory:
mr-jobhistory-daemon.sh start historyserver