Hadoop高可用搭建
一、集群规划
四台主机,主机映射如下图
[root@mast conf]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
172.20.10.7 mast
172.20.10.4 node1
172.20.10.5 node2
172.20.10.6 node3
二、配置Zookeeper(mast节点上)
配置过程:
1.在主节点node1上配置Zookeeper
2.解压Zookeeper安装包及前期准备
# 切换root账号
su
# 解压zookeeper
tar -zxvf zookeeper-3.4.5.tar.gz -C /export/server/
# 创建zookeeper的相关目录
mkdir /export/server/zookeeper-3.4.5/{zkdata,logs}
# 配置环境变量
vi /etc/profile
# 配置Zookeeper
export ZOOKEEPER_HOME=/export/server/zookeeper-3.4.5
export PATH=.:$PATH:$ZOOKEEPER_HOME/bin
# 分发hosts文件到各个节点(node1)
scp /etc/hosts node1:/etc/
scp /etc/hosts node2:/etc/
scp /etc/hosts node3:/etc/
3.配置zoo.cfg文件
[root@mast conf]# cat zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/export/server/zookeeper-3.4.5/zkdata
dataLogDir=/export/server/zookeeper-3.4.5/logs
# the port at which the clients will connect
clientPort=2181
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
# cluster
server.3=node2:2888:3888
server.4=node3:2888:3888
4.将配置好的Zookeeper发送到其他节点
# 将环境变量发送到其他节点
scp /etc/profile node1:/etc/
scp /etc/profile node2:/etc/
scp /etc/profile node3:/etc/
# 将配置好的zookeeper发送到其他节点
scp -r zookeeper-3.4.5/ node1:/export/server/
scp -r zookeeper-3.4.5/ node2:/export/server/
scp -r zookeeper-3.4.5/ node3:/export/server/
# 配置zookeeper的myid配置文件
# cluster中的server.{num}=node2:2888:3888 需要与myid中的数字一致
5.创建myid文件
注意:只需要在server结点下面创建myid文件,并且文件的内容就为zoo.cfg里面的几号机器的数字,如下,在node2里面创建的myid内容为3。
vi /export/server/zookeeper-3.4.5/zkdata/myid 添加命令
[root@node2 zkdata]# cat myid
3
[root@node2 zkdata]#
三、配置Hadoop集群
1、修改配置hadoop-env.sh中JDK和Hadoop路径
# The java implementation to use.
export JAVA_HOME=/export/server/jdk1.8.0_171/
# The jsvc implementation to use. Jsvc is required to run secure datanodes
# that bind to privileged ports to provide authentication of data transfer
# protocol. Jsvc is not required if SASL is configured for authentication of
# data transfer protocol using non-privileged ports.
#export JSVC_HOME=${JSVC_HOME}
#export HADOOP_CONF_DIR=${HADOOP_CONF_DIR:-"/etc/hadoop"}
export HADOOP_CONF_DIR=/export/server/hadoop-2.7.3/etc/hadoop
2、修改core-site.xml
<configuration>
<!--fs.default.name,fs.defaultFS二选一 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://lh1</value>
<description>HDFS的URL,HA下配置</description>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/export/server/hadoop-2.7.3/tmp</value>
<description>节点上本地的hadoop临时文件夹</description>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>node2:2181,node3:2181</value>
<description>指定HDFS HA配置</description>
</property>
</configuration>
3、修改hdfs-site.xml(配置有几个namenode)
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
<description>datenode数,默认是3,应小于datanode机器数量</description>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>如果是true则检查权限,否则不检查(每一个人都可以存取文件)</description>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/export/server/hadoop-2.7.3/hdfs/name</value>
<description>namenode上存储hdfs名字空间元数据</description>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/export/server/hadoop-2.7.3/hdfs/data</value>
<description>datanode上数据块的物理存储位置</description>
</property>
<!--以下是HDFS HA的配置-->
<!--指定HDFS的nameservices名称为lh1,需要和core-site.xml中保持一致-->
<property>
<name>dfs.nameservices</name>
<value>lh1</value>
<description>hdfs的nameservice为lh1,需要和core-site.xml保持一直</description>
</property>
<property>
<name>dfs.ha.namenodes.lh1</name>
<value>nn1,nn2</value>
<description>lh1集群中两个namenode的名字</description>
</property>
<property>
<name>dfs.namenode.rpc-address.lh1.nn1</name>
<value>node1:9000</value>
<description>nn1的RPC通信地址</description>
</property>
<property>
<name>dfs.namenode.http-address.lh1.nn1</name>
<value>node1:50070</value>
<description>nn1的http通信地址</description>
</property>
<property>
<name>dfs.namenode.rpc-address.lh1.nn2</name>
<value>mast:9000</value>
<description>nn2的RPC通信地址</description>
</property>
<property>
<name>dfs.namenode.http-address.lh1.nn2</name>
<value>mast:50070</value>
<description>nn2的http通信地址</description>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node2:8485;node3:8485/lh1</value>
<description>N指定NameNode的元数据在JournalNode上的存放位置</description>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/export/server/hadoop-2.7.3/hadoopdatas/journal</value>
<description>JournalNode上元数据和日志文件存放位置</description>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled.lh1</name>
<value>true</value>
<description>开启Namenode失败自动切换</description>
</property>
<property>
<name>dfs.client.failover.proxy.provider.lh1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
<description>配置失败时切换实现方式</description>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
<description>隔离机制,多个机制换行分割,每个机制一行</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_ras</value>
<description>sshfence隔离机制需要ssh免密登录</description>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
<description>sshfence隔离机制超时时间</description>
</property>
</configuration>
4、修改mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>指定mapreduce使用yarn框架</description>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>mast:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>mast:19888</value>
</property>
</configuration> -->
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
<description>指定mapreduce使用yarn框架</description>
</property>
<!-- 配置MapReduce JobHistory Server地址,默认端口10020 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>0.0.0.0:10020</value>
</property>
<!-- 配置MapReduce JobHistory Server HTTP地址,默认端口19888 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>0.0.0.0:19888</value>
</property>
</configuration>
5、修改yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<!-- 日志聚集功能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
<!-- 日志服务配置 -->
<property>
<name>yarn.log.server.url</name>
<value>http://0.0.0.0:19888/jobhistory/logs</value>
</property>
<!--开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>lq</value>
</property>
<!--指定RM名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!--指定RM名字 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>node1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>mast</value>
</property>
<!--RM故障自动切换-->
<property>
<name>yarn.resourcemaneger.ha.automatic-failover.recover.enabled</name>
<value>true</value>
</property>
<!--RM故障自动恢复-->
<property>
<name>yarn.resourcemaneger.recovery.enabled</name>
<value>true</value>
</property>
<!--向RM调度资源地址-->
<property>
<name>yarn.resourcemaneger.scheduler.address.rm1</name>
<value>node1:8030</value>
</property>
<property>
<name>yarn.resourcemaneger.scheduler.address.rm2</name>
<value>mast:8030</value>
</property>
<!--NodeManeger通过该地址交换信息-->
<property>
<name>yarn.resourcemaneger.resource-tracker.address.rm1</name>
<value>node1:8031</value>
</property>
<property>
<name>yarn.resourcemaneger.resource-tracker.address.rm2</name>
<value>mast:8031</value>
</property>
<!--客户端通过该地址向RM提交对应用程序的操作-->
<property>
<name>yarn.resourcemaneger.address.rm1</name>
<value>node1:8032</value>
</property>
<property>
<name>yarn.resourcemaneger.address.rm2</name>
<value>mast:8032</value>
</property>
<!--管理员通过该地址向RM发送管理命令-->
<property>
<name>yarn.resourcemaneger.admin.address.rm1</name>
<value>node1:8033</value>
</property>
<property>
<name>yarn.resourcemaneger.admin.address.rm2</name>
<value>mast:8033</value>
</property>
<!--RM HTTP访问地址,查看集群信息-->
<property>
<name>yarn.resourcemaneger.webapp.address.rm1</name>
<value>node1:8088</value>
</property>
<property>
<name>yarn.resourcemaneger.webapp.address.rm2</name>
<value>mast:8088</value>
</property>
<!--指定zk集群地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>node2:2181,node3:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
6、修改slaves
[root@node2 hadoop]# cat slaves
node2
node3
9、配置zookeeper环境变量
vim /etc/profile
# 配置Zookeeper
export ZOOKEEPER_HOME=/export/server/zookeeper-3.4.5
export PATH=.:$PATH:$ZOOKEEPER_HOME/bin
You have new mail in /var/spool/mail/root
8、将配置好的hadoop拷贝到其他节点
# 环境变量
scp /etc/profile node1:/etc/
scp /etc/profile node2:/etc/
scp /etc/profile node3:/etc/
# hadoop
scp -r /export/server/hadoop-2.7.3/ node1:/export/server/
scp -r /export/server/hadoop-2.7.3/ node2:/export/server/
scp -r /export/server/hadoop-2.7.3/ node3:/export/server/
四、启动Zookeeper集群
启动zookeeper(slave3,slave4)
# 启动
zkServer.sh start
# 验证是否启动成功,//两台虚拟机中有一台leader和两台follower
# 谁先启动,谁leader
zkServer.sh status
格式化ZKFC(在mast上执行)
hdfs zkfc -formatZK
启动journalnode(分别在node2,node3上执行)
hadoop-daemon.sh start journalnode
五、格式化HDFS(mast上执行)
# 1.格式化目录
hdfs namenode -format
# 2.将格式化之后的mast节点hadoop工作目录中的元数据目录复制到node1节点
scp -r /export/server/hadoop-2.7.3/hadoopdatas/ node1:/export/server/hadoop-2.7.3/
# 3. 初始化完毕之后可以关闭journalnode(分别在node2,node3上执行)(之后在ActiveNN上启动dfs会随之启动全部的journalnode)
hadoop-daemon.sh stop journalnode
六、启动集群
# mast上启动
start-all.sh
# or
start-dfs.sh
start-yarn.sh
#(分别在node2,node3上执行)节点启动Zookeeper
zkServer.sh start
# mast,node1上分别启动zkfc
hadoop-daemon.sh start zkfc
# mast,node1上启动resourcemanager
yarn-daemon.sh start resourcemanager
# node1上启动historyserver
mr-jobhistory-daemon.sh start historyserver
注意:如果mast不能够登录到网站,则说明namenode没有起来,则需要手动起来,所以需要在mast虚拟机上执行命令:
hadoop-daemon.sh start namenode
七、关闭集群
# mast关闭机器
stop-all.sh
# or
stop-dfs.sh
stop-yarn.sh
# node1 ResourceManager、historyserver关闭
yarn-daemon.sh stop resourcemanager
mr-jobhistory-daemon.sh stop historyserver
# mast,node1关闭zkfc
hadoop-daemon.sh stop zkfc
# (分别在node2,node3上执行)节点关闭Zookeeper
zkServer.sh stop
八、测试
在server机上面执行
hadoop jar /export/server/hadoop-2.7.3/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar pi 10 10
如下图测试