HA 高可用集群搭建
1.准备1台虚拟机(配置完基础环境后,克隆2台,将集群搭建完2台主节点,1台子节点)
1.1配置网络
[root@Master1 ~]# vi /etc/sysconfig/network-scripts/ifcfg-eth0
DEVICE=eth0
TYPE=Ethernet
ONBOOT=yes
NM_CONTROLLED=yes
BOOTPROTO=static
IPADDR=192.168.138.121
NETMASK=255.255.255.0
GATEWAY=192.168.138.2
DNS1=114.114.114.114
DNS2=192.168.138.2
[root@Master1 ~]# service network restart
1.2 安装JDK,配置环境
1.2.1 下载 Linux版 jdk1.8 安装包,拖入虚拟机中解压到指定目录
[root@Master1 ~]# tar -zxvf /usr/jdk-8u102-linux-x64.tar.gz -C /usr/local/
1.2.2 配置jdk环境变量
[root@Master1 ~]# vi /etc/profile
export JAVA_HOME=/usr/local/jdk1.8.0_102
export PATH=$PATH:$JAVA_HOME/bin
[root@Master1 ~]# source /etc/profile
1.3 修改主机名和映射关系
[root@Master1 ~]# vi /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=Master1
[root@Master1 ~]# vi /etc/hosts
192.168.138.121 Master1
192.168.138.122 Master2
192.168.138.123 slave
1.4 安装ssh客户端
[root@Master1 ~]# yum install -y openssh-clients.x86_64
1.5 永久关闭防火墙
[root@Master1 ~]# service iptables stop
[root@Master1 ~]# chkconfig iptables off
2. 克隆出两台虚拟机(Master2、slave)
2.1 将两台虚拟机的 eth0 网卡删除 将 eth1 改为 eth0
[root@Master1 ~]# vi /etc/udev/rules.d/70-persistent-net.rules
2.2 分别修改 ip 主机名
按 映射 分别修改
192.168.138.121 Master1
192.168.138.122 Master2
192.168.138.123 slave
注:配置完成后一定要重启虚拟机
[root@Master1 ~]# reboot
3 进行时间同步
3.1分别在主机上安装 ntp,并进行时间同步
[root@Master1 ~]# yum install -y ntp
[root@Master1 ~]# ntpdate cn.pool.ntp.org
4 进行免密登陆设置
4.1分别在3台主机上生成自己的密钥对
[root@Master1 ~]# ssh-keygen -t rsa
4.2分别向自己和其他两台主机发送自己所生成的公钥
其中一台:
[root@Master1 ~]# ssh-copy-id Master1
[root@Master1 ~]# ssh-copy-id Master2
[root@Master1 ~]# ssh-copy-id slave
5 安装zookeeper
5.1 在 Master1 上解压zookeeper安装包到指定目录
[root@Master1 ~]# tar -zxvf /usr/zookeeper-3.4.6.tar.gz -C /usr/local
5.2 进入 zoopeeker 的 conf 文件夹下 修改配置文件 zoo_sample.cfg 先将其改名为 zoo.cfg
[root@Master1 ~]# mv zoo_sample.cfg zoo.cfg
tickTime=2000
initLimit=10
syncLimit=5
dataDir=/root/zk/
clientPort=2181
server.1=Master1:2888:3888
server.2=Master2:2888:3888
server.3=slave:2888:3888
注:修改dataDir 的目录 创建该目录
5.3 将zookeeper 安装文件 发送给其余两台机器
[root@Master1 local]# scp -r zookeeper-3.4.6/ Master2:/usr/local
[root@Master1 local]# scp -r zookeeper-3.4.6/ slave:/usr/local
5.4 分别在各主机上创建配置文件中的目录
[root@Master1 local]# cd /rooot
[root@Master1 local]# mkdir zk
5.5 分别在各台主机的 zk 文件夹下 创建 文件 myid 在Master1 写入 1,在Master2 写入2,在slave写入3
[root@Master1 zk]# vi myid
1
[root@Master2 zk]# vi myid
2
[root@slave zk]# vi myid
3
注:重新登录终端,环境变量生效
5.6 启动zookeeper
分别在 Master1,Master2,slave 上启动 zookeeper
[root@Master1 zookeeper-3.4.6]# bin/zkServer.sh start
[root@Master2 zookeeper-3.4.6]# bin/zkServer.sh start
[root@slave zookeeper-3.4.6]# bin/zkServer.sh start
6 安装Hadoop
6.1 在Master1上解压hadoop压缩包
[root@Master1 usr]# tar -zxvf hadoop-2.7.3.tar.gz -C /usr/local
6.2 修改 hadoop-env.sh 配置文件 配置Java环境
[root@Master1 usr]# cd /usr/local/hadoop-2.7.3/etc/hadoop
[root@Master1 hadoop]# vi hadoop-env.sh
# The java implementation to use.
export JAVA_HOME=/usr/local/jdk1.8.0_102
6.3 修改 core-site.xml 配置文件
<property>
<name>fs.defaultFS</name>
<value>hdfs://beh</value>
<final>false</final>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/root/hadoopdata</value>
<final>false</final>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>Master1:2181,Master2:2181,slaves:2181</value>
<final>false</final>
</property>
注:重点讲三台主机的主机名配置到其中Master1:2181,Master2:2181,slaves:2181
6.4 修改 dfs-site.xml 配置文件
<property>
<name>dfs.nameservices</name>
<value>beh</value>
<final>false</final>
</property>
<property>
<name>dfs.ha.namenodes.beh</name>
<value>nn1,nn2</value>
<final>false</final>
</property>
<property>
<name>dfs.namenode.rpc-address.beh.nn1</name>
<value>Master1:9000</value>
<final>false</final>
</property>
<property>
<name>dfs.namenode.http-address.beh.nn1</name>
<value>Master1:50070</value>
<final>false</final>
</property>
<property>
<name>dfs.namenode.rpc-address.beh.nn2</name>
<value>Master2:9000</value>
<final>false</final>
</property>
<property>
<name>dfs.namenode.http-address.beh.nn2</name>
<value>Master2:50070</value>
<final>false</final>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://Master1:8485;Master2:8485;slave:8485/beh</value>
<final>false</final>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled.beh</name>
<value>true</value>
<final>false</final>
</property>
<property>
<name>dfs.client.failover.proxy.provider.beh</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<final>false</final>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/root/metadata/journal/</value>
<final>false</final>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
<final>false</final>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
<final>false</final>
</property>
6.5 修改 mapred-site.xml 配置文件
先将 配置文件改名
[root@Master1 hadoop]# mv mapred-site.xml.template mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>slave:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>slave:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/root/metadata/hadoop-yarn/staging</value>
</property>
6.6 修改 yarn-site.sh 配置文件
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/root/metadata/yarn</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/root/logs/yarn/userlogs</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<description>Where to aggregate logs</description>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>hdfs://beh/var/log/hadoop-yarn/apps</value>
</property>
<property>
<name>yarn.resourcemanager.connect.retry-interval.ms</name>
<value>2000</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>beh</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm1</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.fair.FairScheduler</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.resourcemanager.zk.state-store.address</name>
<value>Master1:2181,Master2:2181,slave:2181</value>
</property>
<property>
<name>yarn.app.mapreduce.am.scheduler.connection.wait.interval-ms</name>
<value>5000</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>Master1:23140</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>Master1:23130</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm1</name>
<value>Master1:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>Master1:23188</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>Master1:23125</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>Master1:23141</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>Master2:23140</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>Master2:23130</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.https.address.rm2</name>
<value>Master2:23189</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>Master2:23188</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>Master2:23125</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>Master2:23141</value>
</property>
<property>
<name>mapreduce.shuffle.port</name>
<value>23080</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>Master1:2181,Master2:2181,slave:2181</value>
</property>
6.7 修改 slaves 配置文件
slave
注:将子节点主机名配置进去
6.8 将hadoop安装文件 发送给 其余两台主机
[root@Master1 local]# scp -r hadoop-2.7.3/ Master2:/usr/local
[root@Master1 local]# scp -r hadoop-2.7.3/ slave:/usr/local
修改 Master2 的 yarn-site.sh 配置文件 ,将 rm1改为 rm2
<property>
<name>yarn.resourcemanager.ha.id</name>
<value>rm2</value>
</property>
7 启动集群
7.1 三台主机分别 启动journalnode(进程名:JournalNode)
[root@Master1 hadoop-2.7.3]# sbin/hadoop-daemon.sh start journalnode
[root@Master2 hadoop-2.7.3]# sbin/hadoop-daemon.sh start journalnode
[root@slave hadoop-2.7.3]# sbin/hadoop-daemon.sh start journalnode
7.2 格式化zookeeper,在Master1上执行
[root@Master1 bin]# ./hdfs zkfc -formatZK
7.3 对Master1节点进行格式化和启动启动namenode(进程名:NameNode)
[root@Master1 bin]# ./hdfs namenode -format
[root@Master1 hadoop-2.7.3]# sbin/hadoop-daemon.sh start namenode
7.4 对Master2节点进行格式化和启动
[root@Master2 bin]# ./hdfs namenode -bootstrapStandby
[root@Master2 hadoop-2.7.3]# sbin/hadoop-daemon.sh start namenode
7.5 在Master1和Master2上启动zkfc服务(zkfc服务进程名:DFSZKFailoverController):此时Master1和Master2就会有一个节点变为active状态
[root@Master1 hadoop-2.7.3]# sbin/hadoop-daemon.sh start zkfc
[root@Master2 hadoop-2.7.3]# sbin/hadoop-daemon.sh start zkfc
7.6 启动datanode(进程名:DataNode):在slave上执行
[root@slave hadoop-2.7.3]# sbin/hadoop-daemons.sh start datanode
7.7 在Master1上启动(此脚本将会启动hadoop1上的resourcemanager及所有的nodemanager)
[root@Master1 hadoop-2.7.3]# sbin/start-yarn.sh
7.8 在Master2上启动resourcemanager
[root@Master2 hadoop-2.7.3]# sbin/yarn-daemon.sh start resourcemanager
8 验证是否成功
打开浏览器,访问 192.168.138.121:50070 以及 192.168.138.122:50070,你将会看到两个namenode一个是active而另一个是standby。
然后kill掉其中active的namenode进程,另一个standby的naemnode将会自动转换为active状态
打开浏览器,访问192.168.138.121:23188或者 192.168.138.122:23188,只有active的会打开如下界面,standby的那个不会看到页面
然后kill掉active的resourcemanager另一个将会变为active的,说明resourcemanager HA是成功的