阶段二hadoop启用HA
| master | slave1 | slave2 |
HDFS | NameNode | NameNode |
|
DataNode | DataNode | DataNode | |
journalnode | journalnode | journalnode | |
YARM | Resourcemanager | JobHistory Server | Resourcemanager |
NodeManager | NodeManager | NodeManager | |
zookeeper | zk-server | zk-server | zk-server |
一、zookeeper安装
tar -zxvf apache-zookeeper-3.6.3-bin.tar.gz -C /opt/module/
cd /opt/module
mv apache-zookeeper-3.6.3-bin zookeeper
cd /opt/module/zookeeper/conf
mv zoo_sample.cfg zoo.cfg
vim zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/opt/module/zookeeper/data
clientPort=2181
server.1=master:2888:3888
server.2=slave1:2888:3888
server.3=slave2:2888:3888
touch /opt/module/zookeeper/data/myid
向这个 myid 文件中写入 ID(ID 与前面 server.x 的 x 一致)
vi /opt/module/zookeeper/data/myid
chown -R hadoop:hadoop zookeeper
scp -r zookeeper slave1:/opt/module/
scp -r zookeeper slave2:/opt/module/
修改slave1、slave2的myid
注意:zookeeper3.5版本以后需要下载apache-zookeeper-3.5.5-bin.tar.gz ,下载apache-zookeeper-3.5.5.tar.gz是未编译的,启动不了。
二、配置hadoop集群的高可用
vim core-site.xml
<configuration>
<!--Configurations for NameNode(SecondaryNameNode)、DataNode、NodeManager:-->
<!-- 指定 NameNode 的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://myNameNodeServer</value>
<description>NameNode URI</description>
</property>
<!-- 指定 hadoop 数据的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-3.2.2/data</value>
</property>
<!-- 配置 HDFS 网页登录使用的静态用户为 hadoop -->
<property>
<name>hadoop.http.staticuser.user</name>
<value>hadoop</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
<description>Size of read/write buffer used in SequenceFiles,The default value is 131072</description>
</property>
</configuration>
vim hdfs-site.xml
<!-- 指定 NameNode 的地址 -->
<property>
<name>dfs.nameservices</name>
<value>myNameNodeServer</value>
<description>NameNode URI</description>
</property>
<!-- myNameService1下面有两个NameNode,分别是nn1,nn2 -->
<property>
<name>dfs.ha.namenodes.myNameNodeServer</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.myNameService1.nn1</name>
<value>master:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.myNameService1.nn1</name>
<value>master:9870</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.myNameService1.nn2</name>
<value>slave1:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.myNameService1.nn2</name>
<value>slave1:9870</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master:8485;slave1:8485;slave2:8485/myNameNodeServer</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/module/hadoop-3.2.2/journalData</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.myNameNodeServer</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,Failover后防止停掉的Namenode启动,造成两个服务,多个机制用换行分割,即每个机制暂用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆,注意换成自己的用户名 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
vim yarn-site.xml
<configuration>
<!--Configurations for ResourceManager and NodeManager:-->
<property>
<name>yarn.acl.enable</name>
<value>false</value>
<description>Enable ACLs? Defaults to false. The value of the optional is "true" or "false"</description>
</property>
<property>
<name>yarn.admin.acl</name>
<value>*</value>
<description>ACL to set admins on the cluster. ACLs are of for comma-separated-usersspacecomma-separated-groups. Defaults to special value of * which means anyone. Special value of jus
t space means no one has access</description>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.embedded</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master:2181,slave1:2181,slave2:2181</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.client.failover-sleep-base-ms</name>
<value>100</value>
</property>
<property>
<name>yarn.client.failover-sleep-max-ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yarnRM</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm210</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm210</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm210</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm210</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm210</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm210</name>
<value>master:8090</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm209</name>
<value>slave2:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm209</name>
<value>slave2:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm209</name>
<value>slave2:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm209</name>
<value>slave2:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm209</name>
<value>slave2:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm209</name>
<value>slave2:8090</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm210,rm209</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description>Configuration to enable or disable log aggregation</description>
</property>
<!--Congrations for ResourceManager:-->
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
<description>ResourceManager Scheduler class CapacityScheduler (recommended), FairScheduler (also recommended), or FifoScheduler.The default value is "org.apache.hadoop.yarn.server.res
ourcemanager.scheduler.capacity.CapacityScheduler".
</description>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>512</value>
<description>Minimum limit of memory to allocate to each container request at the Resource Manager.NOTES:In MBs</description>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>1024</value>
<description>Maximum limit of memory to allocate to each container request at the Resource Manager.NOTES:In MBs</description>
</property>
<!--Congrations for History Server:-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>-1</value>
<description>How long to keep aggregation logs before deleting them. -1 disables. Be careful, set this too small and you will spam the name node.</description>
</property>
<property>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>-1</value>
<description>Time between checks for aggregated log retention. If set to 0 or a negative value then the value is computed as one-tenth of the aggregated log retention time. Be careful,
set this too small and you will spam the name node.</description>
</property>
<!--Configurations for Configurations for NodeManager:-->
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>1024</value>
<description>Resource i.e. available physical memory, in MB, for given NodeManager.
The default value is 8192.
NOTES:Defines total available resources on the NodeManager to be made available to running containers
</description>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.1</value>
<description>Maximum ratio by which virtual memory usage of tasks may exceed physical memory.
The default value is 2.1
NOTES:The virtual memory usage of each task may exceed its physical memory limit by this ratio. The total amount of virtual memory used by tasks on the NodeManager may exceed its p
hysical memory usage by this ratio.
</description>
</property>
<property>
<name>yarn.nodemanager.local-dir</name>
<value>/opt/module/hadoop-3.2.2/nm-local-dir</value>
<description>Comma-separated list of paths on the local filesystem where intermediate data is written.
The default value is "${hadoop.tmp.dir}/nm-local-dir"
NOTES:Multiple paths help spread disk i/o.
</description>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/opt/module/hadoop-3.2.2/userlogs</value>
<description>Comma-separated list of paths on the local filesystem where logs are written
The default value is "${yarn.log.dir}/userlogs"
NOTES:Multiple paths help spread disk i/o.
</description>
</property>
<property>
<name>yarn.nodemanager.log.retain-seconds</name>
<value>10800</value>
<description>Default time (in seconds) to retain log files on the NodeManager Only applicable if log-aggregation is disabled.
The default value is "10800"
</description>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/opt/module/hadoop-3.2.2/etc/hadoop:/opt/module/hadoop-3.2.2/share/hadoop/common/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/common/*:/opt/module/hadoop-3.2.2/share/hadoop/hdfs:
/opt/module/hadoop-3.2.2/share/hadoop/hdfs/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/hdfs/*:/opt/module/hadoop-3.2.2/share/hadoop/mapreduce/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/ma
preduce/*:/opt/module/hadoop-3.2.2/share/hadoop/yarn:/opt/module/hadoop-3.2.2/share/hadoop/yarn/lib/*:/opt/module/hadoop-3.2.2/share/hadoop/yarn/*</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/logs</value>
<description>HDFS directory where the application logs are moved on application completion. Need to set appropriate permissions. Only applicable if log-aggregation is enabled.
The default value is "/logs" or "/tmp/logs"
</description>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
<description>Suffix appended to the remote log dir. Logs will be aggregated to ${yarn.nodemanager.remote-app-log-dir}/${user}/${thisParam} Only applicable if log-aggregation is enabled
.</description>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
<description>Shuffle service that needs to be set for Map Reduce applications.</description>
</property>
</configuration>
三、启动
1、启动zookeeper集群
每台服务器执行:zkServer.sh start
2、启动journalnode
每台服务器执行:hadoop-daemon.sh start journalnode
3、格式化hdfs
第一次启动需要格式化,后面启动不再需要,格式化会根据 core-site.xml 中的 hadoop.tmp.dir 配置生成一个目录,如果之前有格式化过,那么先删除所有节点的该目录,比如我这里配置的是 /usr/local/hadoop/tmp,我之前有格式化过,那么我需要先删除所有结点的该目录,在三个节点上执行。
rm -rf /opt/module/hadoop-3.2.2/data
一台namenode上执行hdfs的格式化:
hdfs namenode -format
出现:2021-08-05 09:50:12,290 INFO common.Storage: Storage directory /opt/module/hadoop-3.2.2/data/dfs/name has been successfully formatted.表示成功。
将/opt/module/hadoop-3.2.2/data同步到另一个namenode节点。
4、格式化zookeeper
hdfs zkfc -formatZK
5、启动hdfs、yarn
Namenode节点执行:start-dfs.sh
Resourcemanager节点执行:start-yarn.sh
jobhistory 节点执行:mr-jobhistory-daemon.sh start historyserver