一、环境准备:
1、安装java
vim /etc/profile
export JAVA_HOME=/usr/java/jdk1.8.0_251-amd64
export PATH=$JAVA_HOME/bin:$PATH
source /etc/profile
2、修改hostname,便于区分和后续的配置
# 节点一(192.168.165.130)
hostnamectl set-hostname db1
# 节点二(192.168.165.131)
hostnamectl set-hostname db2
# 节点三(192.168.165.132)
hostnamectl set-hostname db3
3、修改host文件,在文件最后添加主机映射关系(三个节点都要设置)
vim /etc/hosts
192.168.165.130 db1
192.168.165.131 db2
192.168.165.132 db3
4、配置ssh免密登录
#4-1 生成秘钥:会有提示,直接回车即可
ssh-keygen
#4-2 分别发送公钥给所有主机
ssh-copy-id -i 192.168.165.131
ssh-copy-id -i 192.168.165.132
#4-3 执行ssh-copy-id -i 节点IP 报错如下
/usr/bin/ssh-copy-id: INFO: Source of key(s) to be installed: "/root/.ssh/id_rsa.pub"
/usr/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/usr/bin/ssh-copy-id: WARNING: All keys were skipped because they already exist on the remote system.
(if you think this is a mistake, you may want to use -f option)
# 该报错表明已经有该文件了,更改i为f强制执行覆盖
ssh-copy-id -f 节点IP
# ssh节点主机名报错
ssh: Could not resolve hostname db: Name or service not known
查看/etc/hosts是否配置正确
# 三个节点执行以下命令(authorized_keys没有本地key,需手动添加)
cat /root/.ssh/id_rsa.pub >> /root/.ssh/authorized_keys
5、下载资源并解压
#5-1 可以直接在网页下载,也可以使用wget命令
zookeeper3.5.8下载地址:
https://archive.apache.org/dist/zookeeper/zookeeper-3.5.8/apache-zookeeper-3.5.8-bin.tar.gz?spm=5176.28103460.0.0.8d99572c7cDrh0&file=apache-zookeeper-3.5.8-bin.tar.gz
hdoop3.2.1下载地址:
https://archive.apache.org/dist/hadoop/common/hadoop-3.2.1/hadoop-3.2.1.tar.gz?spm=5176.28103460.0.0.8d99572c7cDrh0&file=hadoop-3.2.1.tar.gz
hbase2.2.5下载地址:
https://archive.apache.org/dist/hbase/2.2.5/
6、保证每个节点时同步
date
7、关闭selinux及防火墙
#7-1 临时关闭
setenforce 0
#7-2 永久关闭
vim /etc/sysconfig/selinux
SELINUX=disabled
#7-3 查看selinux是否关闭,关闭成功显示Permissive
getenforce
Permissive
#7-4 关闭防火墙
systemctl stop firewalld
# 永久关闭防火墙
systemctl disable firewalld
二、安装配置zookeeper
1、安装zookeeper
mkdir -p /home/hdfs
tar zxvf apache-zookeeper-3.5.8-bin.tar.gz
mv apache-zookeeper-3.5.8-bin /home/hdfs/zookeeper
2、 配置zookeeper,进入zookeeper的 $home/conf 目录下,拷贝 zoo_sample.cfg 为 zoo.cfg
cd /home/hdfs/zookeeper/conf
cp zoo_sample.cfg zoo.cfg
mkdir -p /home/hdfs/zookeeper/{data,log}
3、编辑zoo.cfg
vim /home/hdfs/zookeeper/conf/zoo.cfg
#3-1 添加:
dataLogDir=/home/hdfs/zookeeper/log
dataDir=/home/hdfs/zookeeper/data
#3-2 在该文件最后添加,指定zookeeper集群主机及端口
server.1=db1:2888:3888
server.2=db2:2888:3888
server.3=db3:2888:3888
#3-3在/home/hdfs/zookeeper/data目录下创建myid文件
echo "1" > /home/hdfs/zookeeper/data/myid
#3-4 复制zookeeper目录到另外两个节点
scp -r /home/hdfs/zookeeper db2:/home/
scp -r /home/hdfs/zookeeper db3:/home/
#3-5 设置zk的全局环境变量(三个节点都要设置)
vim /etc/profile
ZOOKEEPER_HOME=/home/hdfs/zookeeper
export PATH=$ZOOKEEPER_HOME/bin:$PATH
source ~/.bash_profile
#3-6 启动zk集群,在每个节点上运行(如出现找不到命令,刷新环境变量source /etc/profile)
zkServer.sh star
#3-7 查看三个节点的zk角色:zkServer.sh status
[root@db ~]# zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /home/hdfs/zookeeper/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost.
Mode: follower
三、Hadoop高可用集群安装
1、解压安装Hadoop
tar -zxvf hadoop-3.2.1.tar.gz
mv hadoop-3.2.1 /home/hdfs/hadoop
2、配置hadoop相关环境变量
#2-1 添加变量(以自身安装jdk路径为准):
vim /home/hdfs/hadoop/etc/hadoop/hadoop-env.sh
JAVA_HOME=/usr/java/jdk1.8.0_251-amd64
source /home/hdfs/hadoop/etc/hadoop/hadoop-env.sh
#2-2 查看是否配置成功
[root@db hadoop]#hadoop version
Hadoop 3.2.1
Source code repository https://gitbox.apache.org/repos/asf/hadoop.git -r b3cbbb467e22ea829b3808f4b7b01d07e0bf3842
Compiled by rohithsharmaks on 2019-09-10T15:56Z
Compiled with protoc 2.5.0
From source with checksum 776eaf9eee9c0ffc370bcbc1888737
This command was run using /home/hdfs/hadoop/share/hadoop/common/hadoop-common-3.2.1.jar
3、配置hadoop进程id文件路径,HADDOP_PID_DIR默认为/tmp,操作系统重启时可能会清除:
vim /home/hdfs/hadoop/etc/hadoop/hadoop-env.sh
export HADOOP_PID_DIR=/home/hdfs/hadoop/pids
#3-1 手动创建目录:
mkdir -p /home/hdfs/hadoop/pids
4、编辑core-site.xml文件
vim /home/hdfs/hadoop/etc/hadoop/core-site.xml
添加如下内容:
<configuration>
<!-- 指定hdfs的nameservice为hadoop,单点模式值为namenode主节点名,本测试为HA模式,需设置为nameservice 的名字-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<!-- 用于序列文件的缓冲区大小。此缓冲区的大小可能是硬件页大小(在Intel x86上为4096)的倍数,它决定在读写操作期间缓冲了多少数据. 默认为4096-->
<property>
<name>io.file.buffer.size</name>
<value>40960</value>
</property>
<!-- 指定hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hdfs/hadoop/tmp/${user.name}</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>db1:2181,db2:2181,db3:2181</value>
</property>
<!-- 解决:Active NameNode日志出现异常IPC‘s epoch [X] is less than the last promised epoch [X+1],出现短期的双Active -->
<property>
<name>ha.health-monitor.rpc-timeout.ms</name>
<value>180000</value>
</property>
</configuration>
5、编辑hdfs-site.xml文件
#5-1 创建存储目录
mkdir -p /home/hdfs/hadoop/data/mycluster
mkdir -p /home/hdfs/hadoop/data/tmp/journal
mkdir -p /home/hdfs/hadoop/data/dn
vim /home/hdfs/hadoop/etc/hadoop/hdfs-site.xml
#5-2 添加如下内容
<configuration>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<!-- mycluster下面有两个NameNode,逻辑名分别设置为db1,db2,也可设置为nn1,nn2,后面的配置要统一引用该逻辑名 -->
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>db1,db2</value>
</property>
<!-- hmaster1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.db1</name>
<value>db1:9000</value>
</property>
<!-- db1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.db1</name>
<value>db1:50070</value>
</property>
<!-- db1的servicerpc通信地址 -->
<property>
<name>dfs.namenode.servicerpc-address.mycluster.db1</name>
<value>db1:53310</value>
</property>
<!-- db2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.mycluster.db2</name>
<value>db2:9000</value>
</property>
<!-- db2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.mycluster.db2</name>
<value>db2:50070</value>
</property>
<!--db2的servicerpc通信地址 -->
<property>
<name>dfs.namenode.servicerpc-address.mycluster.db2</name>
<value>db2:53310</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hdfs/hadoop/data/mycluster</value>
<final>true</final>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置,必须是/home/hdfs/hadoop/sbin/hadoop-daemons.sh start journalnode启动的节点 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://db1:8485;db2:8485;db3:8485/mycluster</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hdfs/hadoop/data/tmp/journal</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 指定DataNode数据的存放位置,建议一台机器挂多个盘,一方面增大容量,另一方面减少磁盘单点故障及磁盘读写能力 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hdfs/hadoop/data/dn</value>
<final>true</final>
</property>
</configuration>
6、编辑mapred-site.xml文件
vim /home/hdfs/hadoop/etc/hadoop/mapred-site.xml
添加如下内容
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- Expert: 将此设置为true,任务跟踪程序将在任务完成时发送带外检测信号,以获得更好的延迟 -->
<property>
<name>mapreduce.tasktracker.outofband.heartbeat</name>
<value>true</value>
</property>
</configuration>
7、编辑yarn-site.xml文件
vim /home/hdfs/hadoop/etc/hadoop/yarn-site.xml
# 添加如下内容
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 开启yanr HA高可靠 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>RM_HA_ID</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 指定db1节点为rm1 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>db1</value>
</property>
<!-- 指定db2节点为rm2 -->
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>db2</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>db1:2181,db2:2181,db3:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>
/home/hdfs/hadoop/etc/hadoop,
/home/hdfs/hadoop/share/hadoop/common/lib/*,
/home/hdfs/hadoop/share/hadoop/common/*,
/home/hdfs/hadoop/share/hadoop/hdfs,
/home/hdfs/hadoop/share/hadoop/hdfs/lib/*,
/home/hdfs/hadoop/share/hadoop/hdfs/*,
/home/hdfs/hadoop/share/hadoop/mapreduce/lib/*,
/home/hdfs/hadoop/share/hadoop/mapreduce/*,
/home/hdfs/hadoop/share/hadoop/yarn,
/home/hdfs/hadoop/share/hadoop/yarn/lib/*,
/home/hdfs/hadoop/share/hadoop/yarn/*
</value>
</property>
</configuration>
8、修改workers文件
vim /home/hdfs/hadoop/etc/hadoop/workers
db1
db2
db3
9、启动前配置操作用户
#9-1 在start-dfs.sh和stop-dfs.sh 文件顶部,添加下列参数
vim /home/hdfs/hadoop/sbin/start-dfs.sh
vim /home/hdfs/hadoop/sbin/stop-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
#9-2 在start-yarn.sh和stop-yarn.sh文件顶部,添加下列参数
vim /home/hdfs/hadoop/sbin/start-yarn.sh
vim /home/hdfs/hadoop/sbin/stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
10、将home目录下的hadoop和/etc/profile发送给集群其余节点
scp -r hadoop db2:/home/hdfs/
scp -r hadoop db3:/home/hdfs/
scp -r /etc/profile db2:/etc/profile
scp -r profile db3:/etc/profile
source /etc/profile
11、启动hadoop集群
#8-1 三个节点执行以下命令
hdfs --daemon start journalnode
jps
4196 JournalNode
4248 Jps
1964 QuorumPeerMain
出现JournalNode则说明启动正常
#11-1 格式化HDFS,在db1执行即可
hdfs namenode -format
#成功提示
Storage directory /home/hdfs/hadoop/data/mycluster has been successfully formatted.
#11-2 格式化后会根据hdfs-site.xml中的hdfs.namenode.name.dir配置生成多个文件夹和元数据,然后拷贝元数据到db2节点
scp -r /home/hdfs/hadoop/data hmaster2:/home/hdfs/hadoop/
#11-3 格式化ZK,在db1执行即可
hdfs zkfc -formatZK
#成功提示
Successfully created /hadoop-ha/mycluster in ZK.
#11-4 启动HDFS(在db1上执行)
/home/hdfs/hadoop/sbin/start-dfs.sh
#报错(此处为未在/etc/profile中添加java变量):
ERROR: JAVA_HOME is not set and could not be found.
# 解决方法:在/etc/profile中添加变量
export JAVA_HOME=/usr/java/jdk1.8.0_251-amd64
export PATH=$JAVA_HOME/bin:$PATH
source /etc/profile
# 再次启动:/home/hdfs/hadoop/sbin/start-dfs.sh
# 报错:
ERROR: Refusing to run as root: roo account is not found. Aborting.
# 解决:
# 将6-1、6-2中需要添加的变量添加至顶部
# 再次启动:/home/hdfs/hadoop/sbin/start-dfs.sh
/home/hdfs/hadoop/sbin/start-dfs.sh
Starting namenodes on [db1 db2]
上一次登录:二 9月 3 20:11:05 CST 2024从 db3pts/2 上
Starting datanodes
上一次登录:二 9月 3 20:48:43 CST 2024pts/1 上
Starting journal nodes [db1 db3 db2]
上一次登录:二 9月 3 20:48:46 CST 2024pts/1 上
db1: journalnode is running as process 10824. Stop it first.
Starting ZK Failover Controllers on NN hosts [db1 db2]
上一次登录:二 9月 3 20:49:08 CST 2024pts/1 上
# 使用jps查看
jps
17841 DataNode
8611 QuorumPeerMain
17685 NameNode
18869 Jps
10824 JournalNode
18329 DFSZKFailoverController
#11-5 启动yarn
/home/hdfs/hadoop/sbin/start-yarn.sh
# 成功提示
Starting resourcemanagers on [ db1 db2]
上一次登录:二 9月 3 20:49:14 CST 2024pts/1 上
Starting nodemanagers
上一次登录:二 9月 3 21:22:09 CST 2024pts/1 上
jps查看
[root@db1 sbin]# jps
17841 DataNode
8611 QuorumPeerMain
20691 NodeManager
17685 NameNode
10824 JournalNode
18329 DFSZKFailoverController
20922 Jps
20540 ResourceManager
出现ResourceManager代表yarn启动成功
#11-6-1 查看集群组件情况
# 在NameNode主节点db1上,用命令查看Namenode
[root@db1 bin]# hdfs haadmin -getServiceState db1
active
[root@db1 bin]# hdfs haadmin -getServiceState db2
standby
#11-6-2 在NameNode主节点nn上,用命令查看RM节点主备状态
[root@db1 bin]# yarn rmadmin -getServiceState rm1
active
[root@db1 bin]# yarn rmadmin -getServiceState rm2
standby
#11-6-3 主备切换db1,db2状态变更
[root@db1 hadoop]# hdfs haadmin -getServiceState db1
active
[root@db1 hadoop]# hdfs haadmin -getServiceState db2
standby
#11-6-4 在主节点nn上,手动kill掉namenode进程,可以看到db2立即变为active状态
[root@db1 hadoop]# jps
17841 DataNode
8611 QuorumPeerMain
20691 NodeManager
17685 NameNode
10824 JournalNode
18329 DFSZKFailoverController
20540 ResourceManager
27020 Jps
[root@db1 hadoop]# kill -9 17685
[root@db1 bin]# hdfs haadmin -getServiceState db2
active
#11-6-5 kill了nn的namenode进程,再启动该进程,看看nn能否变为standby模式
[root@db1 bin]# hdfs --daemon start namenode
[root@db1 bin]# hdfs haadmin -getServiceState db1
standby
#11-6-6 或者使用强制转换主备命测试,注意因为是是测试环境,所以可以强制测试,如果已经在生产环境,请做好fsimage备份,否则可能主备的元数据不同步导致数据丢失。
[root@db1 bin]# hdfs haadmin -getAllServiceState
db1:53310 standby
db2:53310 active
# 可以看到启动db1服务后,db1自身成功转为standby模式。
# 同理,RM的主备切换和恢复的过程跟上述一致
#11-6-7 强制转换主备命测试
hdfs haadmin -transitionToStandby --forcemanual db2
You have specified the --forcemanual flag. This flag is dangerous, as it can induce a split-brain scenario that WILL CORRUPT your HDFS namespace, possibly irrecoverably.
It is recommended not to use this flag, but instead to shut down the cluster and disable automatic failover if you prefer to manually manage your HA state.
You may abort safely by answering 'n' or hitting ^C now.
Are you sure you want to continue? (Y or N)yes
# NameNode转变为备用状态,而不经过正常的自动故障转移流程,这可能会导致两个 NameNode 同时认为自己是活动节点,从而产生所谓的split-brain(分裂脑)状况,导致数据不一致甚至数据丢失
[root@db1 bin]# hdfs haadmin -getAllServiceState
db1:53310 active
db2:53310 standby
#11-7 在zookeeper目录下查看hadoop HA建立的znode及其内容
#注意:以下说的zk节点是指znode,是一种类似目录的路径,不是指hadoop节点(服务器),注意区分。
cd /home/hdfs/zookeeper/bin/
./zkCli.sh
[zk: localhost:2181(CONNECTED) 0] ls /
[hadoop-ha, rmstore, yarn-leader-election, zookeeper]
#11-7-1 查看hadoop-ha节点,可以看到子节点hdapp就是我们在hdfs-site.xml里配置的集群nameservices名称,若有多个集群,那么/hadoop-ha节点将有多个子节点
[zk: localhost:2181(CONNECTED) 2] ls /hadoop-ha
[mycluster]
#11-7-2 继续查看子节点hdapp是否还有子节点:可以看到这是都active状态节点的信息
[zk: localhost:2181(CONNECTED) 3] ls /hadoop-ha/mycluster
[ActiveBreadCrumb, ActiveStandbyElectorLock]
# ABC是持久节点,ASE是临时节点,db1、db2都在ASE注册监听临时节点删除事件
#11-7-3 查看主备选举的锁节点存放在哪个节点地址
[zk: localhost:2181(CONNECTED) 5] get /hadoop-ha/mycluster/ActiveStandbyElectorLock
myclusterdb1db1 ��(�>
至此负责底层分布式存储的Hadoop HA高可用已经完整实现.
12、访问web页面
http://db1:8088
三、Hbase高可用部署(主节点db1操作)
1. 安装hbase
tar -zvxf hbase-2.2.5-bin.tar.gz
mv hbase-2.2.5 /home/hdfs/hbase
2. 配置环境变量(三个节点都要)
vim /etc/profile
export HBASE_HOME=/home/hdfs/hbase
export PATH=.:$HBASE_HOME/bin/:$PATH
source /etc/profile
3. 配置hbase-env.sh
vim /home/hdfs/hbase/conf/hbase-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_251-amd64
export HADOOP_HOME=/home/hdfs/hadoop
export HBASE_HOME=/home/hdfs/hbase
# 关闭自带zookeeper,采用外部自定义部署的zookeeper
export HBASE_MANAGES_ZK=false
# 修改存储pid文件的目录,默认为/tmp
export HBASE_PID_DIR=/home/hdfs/hbase/pids
mkdir -p /home/hdfs/hbase/pids
4. 配置hbase-site.xml
vim /home/hdfs/hbase/conf/hbase-site.xml
<configuration>
<!-- hadoop集群名称 -->
<property>
<name>hbase.rootdir</name>
<value>hdfs://mycluster/hbase</value>
</property>
<property>
<name>hbase.zookeeper.quorum</name>
<value>db1,db2,db3</value>
</property>
<property>
<name>hbase.zookeeper.property.clientPort</name>
<value>2181</value>
</property>
<!-- 是否是完全分布式 -->
<property>
<name>hbase.cluster.distributed</name>
<value>true</value>
</property>
<!-- 完全分布式式必须为false -->
<property>
<name>hbase.unsafe.stream.capability.enforce</name>
<value>false</value>
</property>
<!-- 指定缓存文件存储的路径 -->
<property>
<name>hbase.tmp.dir</name>
<value>/home/hdfs/hbase/data/hbase_tmp</value>
</property>
<!-- 指定Zookeeper数据存储的路径 -->
<property>
<name>hbase.zookeeper.property.dataDir</name>
<value>/home/hdfs/hbase/data/zookeeper_data</value>
</property>
</configuration>
mkdir -p /home/hdfs/hbase/data/{hbase_tmp,zookeeper_data}
注意:$HBASE_HOME/conf/hbase-site.xml 的 hbase.rootdir 的value值(包括主机和端口号)要与 $HADOOP_HOME/conf/core-site.xml 的 fs.default.name 的value值(包括主机和端口号)一致。
5. 配置regionservers
vim /home/hdfs/hbase/conf/regionservers
db1
db2
db3
配置Hmaster高可用
为了保证Hbase集群的高可靠性,Hbase支持多 Backup Master 设置;当 Active Master 宕掉后,Backup Master 可以自动接管整个Hbase集群;
在 $HBASE_HOME/conf/ 目录下新增配置文件 backup-masters,在其内添加要用做 Backup Master 的节点。
vim /home/hdfs/hbase/conf/backup-masters
db2
scp -r hbase db2:/home/hdfs
scp -r hbase db3:/home/hdfs
5.2 创建hdfs-site.xml的软链到hbase的conf目录下(三个节点执行)
ln -s /home/hdfs/hadoop/etc/hadoop/hdfs-site.xml /home/hdfs/hbase/conf/hdfs-site.xml
目的是为了HBase能够同步hdfs配置变化,例如上面提到当hdfs副本数改为5时,如果不创建这种配置映射,那么HBase还是按默认的3份去执行。
若缺少这个软链接,HBase启动集群服务有问题,部分RS无法启动!
5.3 启动hbase集群
start-hbase.sh
报错:
[root@db1 conf]# start-hbase.sh
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hdfs/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hdfs/hbase/lib/client-facing-thirdparty/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
running master, logging to /home/hdfs/hbase/logs/hbase-root-master-db1.out
SLF4J: Class path contains multiple SLF4J bindings.
SLF4J: Found binding in [jar:file:/home/hdfs/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: Found binding in [jar:file:/home/hdfs/hbase/lib/client-facing-thirdparty/slf4j-log4j12-1.7.25.jar!/org/slf4j/impl/StaticLoggerBinder.class]
SLF4J: See http://www.slf4j.org/codes.html#multiple_bindings for an explanation.
SLF4J: Actual binding is of type [org.slf4j.impl.Log4jLoggerFactory]
db1: running regionserver, logging to /home/hdfs/hbase/logs/hbase-root-regionserver-db1.out
db3: running regionserver, logging to /home/hdfs/hbase/logs/hbase-root-regionserver-db3.out
db2: running regionserver, logging to /home/hdfs/hbase/logs/hbase-root-regionserver-db2.out
db2: running master, logging to /home/hdfs/hbase/logs/hbase-root-master-db2.out
2. 停止hbase集群
stop-hbase.sh
3. log4j冲突处理
原因是有两个log4j的jar起了冲突,删除每个主机节点的其中一个即可
解决:
cd /home/hdfs/hbase/lib/client-facing-thirdparty/
[root@db2 client-facing-thirdparty]# ll
总用量 2108
-rw-r--r--. 1 root root 20437 9月 4 14:55 audience-annotations-0.5.0.jar
-rw-r--r--. 1 root root 61829 9月 4 14:55 commons-logging-1.2.jar
-rw-r--r--. 1 root root 15322 9月 4 14:55 findbugs-annotations-1.3.9-1.jar
-rw-r--r--. 1 root root 1506370 9月 4 14:55 htrace-core4-4.2.0-incubating.jar
-rw-r--r--. 1 root root 489884 9月 4 14:55 log4j-1.2.17.jar
-rw-r--r--. 1 root root 41203 9月 4 14:55 slf4j-api-1.7.25.jar
-rw-r--r--. 1 root root 12244 9月 4 14:55 slf4j-log4j12-1.7.25.jar
log4j-1.2.17.jar跟slf4j-log4j12-1.7.25.jar有冲突
mv slf4j-log4j12-1.7.25.jar slf4j-log4j12-1.7.25.jar.bak
4. 重新启动hbase
start-hbase.sh
5. HBase Web
http://db1:16010
6、最终进程
[root@db1 conf]# jps
37024 DataNode
38032 ResourceManager
40224 HRegionServer
8611 QuorumPeerMain
37299 JournalNode
37558 DFSZKFailoverController
38182 NodeManager
36875 NameNode
40044 HMaster
40716 Jps
[root@db2 ~]# jps
25537 NameNode
25857 DFSZKFailoverController
28049 Jps
27763 HMaster
27604 HRegionServer
26186 NodeManager
1964 QuorumPeerMain
25628 DataNode
26109 ResourceManager
25743 JournalNode
[root@db3 ~]# jps
19120 NodeManager
18833 DataNode
20342 HRegionServer
18954 JournalNode
20683 Jps
1951 QuorumPeerMain
四、集群启动关闭:
1、zokeeper集群
/home/hdfs/zookeeper/bin/zkServer.sh start
/home/hdfs/zookeeper/bin/zkServer.sh stop
2、hadoop集群
start-dfs.sh
stop-dfs.sh
3、启动yarn
start-yarn.sh
stop-yarn.sh
4、启动hbase集群
start-hbase.sh
stop-hbase.sh
六、hadoop主备转移相关命令:
1、#查看Namenode服务状态
hdfs haadmin -getServiceState db1
hdfs haadmin -getServiceState db2
hdfs haadmin -getAllServiceState
2、查看RM节点主备状态
yarn rmadmin -getServiceState rm1
yarn rmadmin -getServiceState rm2
yarn rmadmin -getAllServiceState
3、#启动namenode
hdfs --daemon start namenode
参考文章:https://blog.csdn.net/Sara_cloud/article/details/111308142