hadoop HA
1.集群规划
主机名 | ip | 安装软件 |
---|---|---|
master01 | 192.168.232.128 | jdk,hadoop |
master02 | 192.168.232.133 | jdk,hadoop |
slave01 | 192.168.232.135 | jdk,hadoop,zookeeper |
slave02 | 192.168.232.132 | jdk,hadoop,zookeeper |
slave03 | 192.168.232.134 | jdk,hadoop,zookeeper |
2.准备
将个机器安装好jdk,并配置好免密登陆
3. 在slave01/slave02/slave03上安装zookeeper
参考http://blog.csdn.net/qq_34056219/article/details/78745915
4. 安装hadoop
4.1. 准备
下载hadoop-2.7.0.tar.gz,并将其移动到/usr/local/下
解压
[root@master01 local]# tar -zxvf hadoop-2.7.0.tar.gz
得到文件夹hadoop.-2.7.0
将hadoop-2.7.0改名为hadoop
[root@master01 local]# mv hadoop-2.7.0 hadoop
将文件夹hadoop的所属者/组改为hadoop
[root@master01 local]# chown -R hadoop:hadoop /usr/local/hadoop
4.2. 设置环境变量
[root@master01 local]# vi /etc/profile
在最后写上
export HADOOP_HOME=/usr/local/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
更新配置
[root@master01 local]# source /etc/profile
4.3. 修改配置
配置hadoop-env.sh
配置生效
[root@master01 local]# cd /hadoop/etc/hadoop
[root@master01 local]# vi hadoop-evn.sh
进入文件后
export JAVA_HOME=${JAVA_HOME} #将这个改成JDK路径,如下
export JAVA_HOME=/usr/local/java/jdk1.8.0_144
然后更新文件配置
[root@master01 local]# source ./hadoop-evn.sh
配置core-site.xml
<configuration>
#这里的值指的是默认的HDFS路径。当有多个HDFS集群同时工作时,集群名称在这里指定!该值来自于hdfs-site.xml中的配置
<property>
<name>fs.defaultFS</name>
<value>hdfs://cluster1</value>
</property>
#这里的路径默认是NameNode、DataNode、JournalNode等存放数据的公共目录。用户也可以自己单独指定这三类节点的目录
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/data/hadoop/tmp</value>
</property>
<!--这里是ZooKeeper集群的地址和端口。注意,数量一定是奇数,且不少于三个节点-->
<property>
<name>ha.zookeeper.quorum</name>
<value>slave01:2181,slave02:2181,slave03:2181</value>
</property>
</configuration>
配置hdfs-site.xml
<configuration>
<!--指定hdfs的nameservice为cluster1,需要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>cluster1</value>
</property>
<!-- cluster1下面有两个NameNode,分别是nn1,nn2 -->
<property>
<name>dfs.ha.namenodes.cluster1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.cluster1.nn1</name>
<value>master01:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.cluster1.nn1</name>
<value>master01:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.cluster1.nn2</name>
<value>master02:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.cluster1.nn2</name>
<value>master02:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://slave01:8485;slave02:8485;slave03:8485/cluster1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/usr/local/hadoop/data/journaldata</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.cluster1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
</configuration>
配置mapred-site.xml
先将mapred-site.xml.template改名为mapred-site.xml然后修改mapred-site.xml
<configuration>
<!-- 指定运行mapreduce的环境是yarn -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
配置yarn-site.xml
<configuration>
<!-- 开启RM高可用-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定resourcemanager的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!-- 指定ResourceManager的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定ResourceManager1的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master01</value>
</property>
<!-- 分别指定ResourceManager2的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>master02</value>
</property>
<!-- 指定zookeeper集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>slave01:2181,slave02:2181,slave03:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
配置slaves
slave01
slave02
slave03
5. 拷贝
最后,把Hadoop整个目录拷贝到master02,slave01/02/03,拷之前把share下doc删了(文档不用拷),这样会快点
[hadoop@master01 local]# scp -r hadoop hadoop@master02:/usr/local/
[hadoop@master01 local]# scp -r hadoop hadoop@slave01:/usr/local/
[hadoop@master01 local]# scp -r hadoop hadoop@slave02:/usr/local/
[hadoop@master01 local]# scp -r hadoop hadoop@slave03:/usr/local/
6. 启动
6.1. 启动zookeeper集群(在slave01、slave02、slave03上启动)
[hadoop@slave01 ~]# cd /usr/local/zookeeper/bin
[hadoop@slave01 bin]# ./zkServer.sh start
<!--查看状态:一个leader,两个follower-->
[hadoop@slave01 bin]# ./zkServer.sh status
6.2. 启动journalnode(分别在在slave01、slave02、slave03上执行)
[hadoop@slave01 ~]# cd /usr/local/hadoop/sbin
[hadoop@slave01 sbin]# ./hadoop-daemon.sh start journalnode
#验证
[hadoop@slave01 sbin]# jps
#显示JouralNode + QuorumpeerMain
6.3. 格式化namenode(master01)
[hadoop@master01 ~]# hdfs namenode -format
注意:格式化之后需要把tmp目录拷给master02(不然master02的namenode起不来)
[hadoop@master01 ~]# cd /usr/local/hadoop/data/hadoop
[hadoop@master01 hadoop]# scp -r tmp hadoop@master02:/usr/local/hadoop/data/hadoop/
6.4. 格式化zkfc(master01)
[hadoop@master01 ~]# hdfs zkfc -formatZK
6.5. 启动HDFS(master01)
[hadoop@master01 ~]# cd /usr/local/hadoop/sbin
[hadoop@master01 sbin]# ./start-dfs.sh
6.6. 启动YARN(master01)
[hadoop@master01 sbin]# ./start-yarn.sh
注意:
- master02的resourcemanager需要手动单独启动:
yarn-daemon.sh start resourcemanager - namenode、datanode也可以单独启动:
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode