大数据Hadoop–全分布,HA搭建
一.全分布式环境SSH免密登录以及时间同步设置
1.集群时间同步设置
(1)模拟内网环境:在集群中找一台服务器作为:时间服务器,
例如hadoop01作为时间服务器,hadoop02,hadoop03同步hadoop01。
(2)查看Linux中的ntpd时间服务(只需开启时间服务器的ntpd服务):
#查看ntpd的状态
[root@hadoop02 proc]# service ntpd status
ntpd (pid 964) is running...
#将ntpd设置为开机自启动(时间服务器)
[root@hadoop02 proc]# chkconfig ntpd on
[root@hadoop02 proc]# chkconfig | grep ntpd
ntpd 0:off 1:off 2:on 3:on 4:on 5:on 6:off
ntpdate 0:off 1:off 2:off 3:off 4:off 5:off 6:off
(3)修改系统配置
[root@hadoop02 proc]# vi /etc/ntp.conf
#第一处:修改为自己的网段
# Hosts on local network are less restricted.
restrict 192.168.159.0 mask 255.255.255.0 nomodify notrap
#第二处:内网环境不用添加服务,注释掉。
# Use public servers from the pool.ntp.org project.
# Please consider joining the pool (http://www.pool.ntp.org/join.html).
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
#第三处:添加启动本地服务
server 127.127.1.0
fudge 127.127.1.0 stratum 10
#重启ntpd服务
[root@hadoop02 proc]# service ntpd restart
Shutting down ntpd: [ OK ]
Starting ntpd: [ OK ]
(4)同步时间服务器
时间服务器同步国家授时中心服务器
[root@hadoop02 proc]# ntpdate -u ntp.sjtu.edu.cn
8 Jun 08:17:24 ntpdate[1148]: adjust time server 120.25.115.20 offset -0.001159 sec
从节点同步时间服务器
[root@hadoop03 proc]# service ntpd stop
Shutting down ntpd: [ OK ]
[root@hadoop03 proc]# ntpdate hadoop02
8 Jun 08:21:18 ntpdate[1137]: adjust time server 192.168.159.122 offset 0.005546 sec
[root@hadoop04 proc]# service ntpd stop
Shutting down ntpd: [ OK ]
[root@hadoop04 proc]# ntpdate hadoop02
8 Jun 08:21:18 ntpdate[1137]: adjust time server 192.168.159.122 offset -0.003125 sec
(5)设置从服务器定时同步时间服务器
[root@hadoop03 proc]# crontab -e
no crontab for root - using an empty one
crontab: installing new crontab
#synchronize time with time server
0-59/10 * * * * /usr/sbin/ntpdate hadoop01
二.hadoop集群环境部署
全分布服务节点部署规划
hadoop02 | hadoop03 | hadoop04 |
---|---|---|
namenode | resourcemanager | secondarynamenode |
datanode | datanode | datanode |
nodemanager | nodemanager | nodemanager |
historyserver |
1.清除原有日志文件(在原来配置过了的Hadoop的基础上修改)
[root@hadoop02 hadoop-2.7.3]# ls
bin etc include lib libexec LICENSE.txt logs native-2.7.3-snappy.tar.gz NOTICE.txt README.txt sbin share
[root@hadoop02 hadoop-2.7.3]# rm -rf logs/
[root@hadoop02 hadoop-2.7.3]# ls
bin etc include lib libexec LICENSE.txt native-2.7.3-snappy.tar.gz NOTICE.txt README.txt sbin share
2.修改配置文件
<!--vi core-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<!--namenode(主节点)访问入口-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop02:8020</value>
</property>
<!--配置metadata的存放位置-->
<property>
<name>hadoop.tmp.dir</name>
<value>/var/data/hadoop/full</value>
</property>
</configuration>
<!--vi hdfs-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop04:50090</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!--配置静态用户-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
</configuration>
#vi slaves
#-------------------------------------------------------------------------------------------------#
hadoop02
hadoop03
hadoop04
<!--vi mapred-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<property>
<!--用于执行MapReduce作业的运行时框架。可以是local,classic或yarn。-->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--以下端口号不要随意改动-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop04:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop04:19888</value>
</property>
</configuration>
<!--vi yarn-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<!--指定mapreduce的时候使用shuffle-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<!--指定ResourceManager所在的主机名-->
<name>yarn.resourcemanager.hostname</name>
<value>hadoop03</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
</configuration>
#vi hadoop-env.sh
---------------------------------------------------------------------------------------------------
# The java implementation to use.
export JAVA_HOME=/usr/java/jdk1.8.0_211-amd64
#vi mapred-env.sh
---------------------------------------------------------------------------------------------------
export JAVA_HOME=/usr/java/jdk1.8.0_211-amd64
#vi yarn-env.sh
---------------------------------------------------------------------------------------------------
# some Java parameters
export JAVA_HOME=/usr/java/jdk1.8.0_211-amd64
注意以上代码中的虚线只是为了区分命令与代码,命令全部都注释了。
3.配置集群间各个节点免密登陆
#生成密钥
[root@hadoop02 ~]# ssh-keygen -t rsa -P ''
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
a7:d7:e3:80:a4:12:a5:a0:3f:c2:a7:9d:88:79:52:df root@hadoop02
The key's randomart image is:
+--[ RSA 2048]----+
| |
| |
| . . |
| . . o |
|. o S . |
|... . o + . |
|.ooo... o o o |
|oo*.o.E . o . |
|o= o . |
+-----------------+
[root@hadoop02 ~]# ssh-copy-id hadoop02
[root@hadoop02 ~]# ssh-copy-id hadoop03
[root@hadoop02 ~]# ssh-copy-id hadoop04
以上就配置好了
4.将配置好的hadoop发送到集群的其他节点,并启动hadoop集群
[root@hadoop02 hadoop-2.7.3]# scp -r hadoop-2.7.3/ hadoop03:`pwd`
[root@hadoop02 hadoop-2.7.3]# scp -r hadoop-2.7.3/ hadoop04:`pwd`
启动集群
#1.主节点格式化namenode
[root@hadoop02 hadoop-2.7.3]# hdfs namenode -format
#2.启动集群中的namenode,datanode,secondarynamenode,nodemanager
[root@hadoop02 apache]# start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [hadoop02]
hadoop02: starting namenode, logging to /opt/apache/hadoop-2.7.3/logs/hadoop-root-namenode-hadoop02.out
hadoop03: starting datanode, logging to /opt/apache/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop03.out
hadoop04: starting datanode, logging to /opt/apache/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop04.out
hadoop02: starting datanode, logging to /opt/apache/hadoop-2.7.3/logs/hadoop-root-datanode-hadoop02.out
Starting secondary namenodes [hadoop04]
hadoop04: starting secondarynamenode, logging to /opt/apache/hadoop-2.7.3/logs/hadoop-root-secondarynamenode-hadoop04.out
starting yarn daemons
starting resourcemanager, logging to /opt/apache/hadoop-2.7.3/logs/yarn-root-resourcemanager-hadoop02.out
hadoop04: starting nodemanager, logging to /opt/apache/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop04.out
hadoop03: starting nodemanager, logging to /opt/apache/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop03.out
hadoop02: starting nodemanager, logging to /opt/apache/hadoop-2.7.3/logs/yarn-root-nodemanager-hadoop02.out
#3.启动集群中的resourcemanager
[root@hadoop03 hadoop]# yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /opt/apache/hadoop-2.7.3/logs/yarn-root-resourcemanager-hadoop03.out
[root@hadoop03 hadoop]# jps
2768 ResourceManager
2546 DataNode
2642 NodeManager
2982 Jps
#4.启动历史记录服务器
[root@hadoop04 hadoop]# mr-jobhistory-daemon.sh start historyserver
starting historyserver, logging to /opt/apache/hadoop-2.7.3/logs/mapred-root-historyserver-hadoop04.out
[root@hadoop04 hadoop]# jps
3239 JobHistoryServer
2938 DataNode
3099 NodeManager
3276 Jps
3036 SecondaryNameNode
[root@hadoop02 ~]# stop-all.sh
This script is Deprecated. Instead use stop-dfs.sh and stop-yarn.sh
Stopping namenodes on [hadoop02]
hadoop02: stopping namenode
hadoop04: stopping datanode
hadoop03: stopping datanode
hadoop02: stopping datanode
Stopping secondary namenodes [hadoop04]
hadoop04: stopping secondarynamenode
stopping yarn daemons
no resourcemanager to stop
hadoop03: stopping nodemanager
hadoop04: stopping nodemanager
hadoop02: stopping nodemanager
no proxyserver to stop
[root@hadoop02 ~]#
三.Hadoop高可用集群搭建
1.只能由一个NameNode对外提供服务,多个NameNode对外提供服务会造成脑裂问题(Active,Standby);
2.多个NameNode的元数据信息必须保持一致(journalnode替代了secondarynamenode);
3.必须有一个代理,让客户端知道哪一台NameNode是对外提供服务的(zookeeper)。
Zookeeper
角色
领导者(leader):领导者负责进行投票的发起和决议,更新系统状态。
跟随者(follower):follower用于接收客户端请求并向客户端返回结果,在选主的过程中参与投票。
观察者(Observer):Observer可以接收客户端连接,将写请求转发给leader节点,但Observer不参合投票过程,只同步leader的状态。Observer的目的是为了扩展系统,提高读取速度。
客户端(Client):请求发起方。
应用场景:
在HA架构中,记录Active的机器;
在HBase中,Hbase集群的请求入口;
在kafka中,记录消费者和生产者的信息,包括偏移量;
zookeeper单节点搭建
#1.解压安装包
[root@hadoop02 cdh]# tar xvf zookeeper-3.4.14.tar.gz -C /opt/cdh/
#2.修改配置文件
[root@hadoop02 conf]# ls
configuration.xsl log4j.properties zoo_sample.cfg
[root@hadoop02 conf]# cp zoo_sample.cfg zoo.cfg
[root@hadoop02 conf]# vi zoo.cfg
#指定ZK本地存储的数据存放目录
dataDir=/var/ha/zk
#添加Zookeeper环境变量
vi /etc/profile
export ZOOKEEPER_HOME=/opt/cdh/zookeeper-3.4.5-cdh5.3.6
export PATH=$PATH:$JAVA_HOME/bin:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$ZOOKEEPER_HOME/bin
#启动zookeeper
source /etc/profile
zkServer.sh start
#必须配置本地回环地址 127.0.0.1 localhost
[root@hadoop02 zookeeper-3.4.5-cdh5.3.6]# zkServer.sh status
JMX enabled by default
Using config: /opt/cdh/zookeeper-3.4.5-cdh5.3.6/bin/../conf/zoo.cfg
Mode: standalone
[root@hadoop02 zookeeper-3.4.5-cdh5.3.6]# jps
1184 QuorumPeerMain
1229 Jps
zookeeper分布式搭建
修改Zookeeper配置文件
vi zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/var/ha/zk
# the port at which the clients will connect
clientPort=2181
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=hadoop02:2888:3888
server.2=hadoop03:2888:3888
server.3=hadoop04:2888:3888
2181:表示客户端端口号;
2888:表示ZK节点内部通信端口号;
3888:表示ZK内部选举端口号。
在dataDir=/var/ha/zk路径下创建文件名为myid(不可更改),并写入相应的编号,例如server.1就写1(每一个zk节点都要做相应操作)
[root@hadoop02 zookeeper-3.4.5-cdh5.3.6]# mkdir -p /var/ha/zk
[root@hadoop02 zookeeper-3.4.5-cdh5.3.6]# vi /var/ha/zk/myid
将配置好的zk发送到其余节点的相应目录下
[root@hadoop02 opt]# scp -r cdh/ hadoop03:'pwd'
[root@hadoop02 opt]# scp -r cdh/ hadoop04:'pwd'
启动zk各节点
zkServer.sh start
[root@hadoop02 ~]# zkServer.sh status
JMX enabled by default
Using config: /opt/cdh/zookeeper-3.4.5-cdh5.3.6/bin/../conf/zoo.cfg
Mode: leader
[root@hadoop03 ~]# zkServer.sh status
JMX enabled by default
Using config: /opt/cdh/zookeeper-3.4.5-cdh5.3.6/bin/../conf/zoo.cfg
Mode: follower
[root@hadoop04 ~]# zkServer.sh status
JMX enabled by default
Using config: /opt/cdh/zookeeper-3.4.5-cdh5.3.6/bin/../conf/zoo.cfg
Mode: follower
搭建HA
节点部署
hadoop02 | hadoop03 | hadoop04 |
---|---|---|
NameNode | NameNode | ResourceManager |
ZKFC | ZKFC | DataNode |
DataNode | DataNode | JournalNode |
JournalNode | JournalNode | NodeManager |
NodeManager | NodeManager | Zookeeper |
Zookeeper | Zookeeper | historyserver |
手动切换
配置Hadoop
1.vi core-site.xml
<configuration>
<!--ha集群统一入口-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop02:8020</value>
</property>
<!--配置metadata的存放位置-->
<property>
<name>hadoop.tmp.dir</name>
<value>/var/ha/cdh</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop02:2181,hadoop03:2181,hadoop04:2181</value>
</property>
</configuration>
2.vi hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.permission.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!--nn1的RPC通信地址,nn1所在地址-->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>hadoop02:8020</value>
</property>
<!--nn1的http通信地址,外部访问地址-->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>hadoop02:50070</value>
</property>
<!--nn2的RPC通信地址,nn2所在地址-->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>hadoop03:8020</value>
</property>
<!--nn2的http通信地址,外部访问地址-->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>hadoop03:50070</value>
</property>
<!--指定NameNode的元数据在JournalNode日志上的存放位置(一般和zookeeper部署在一起)-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop02:8485;hadoop03:8485;hadoop04:8485/ns1</value>
</property>
<!--指定JournalNode在本地磁盘存放数据的位置-->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/ha/journal</value>
</property>
<!--客户端通过代理访问namenode-->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--配置自动切换的方法-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<!--使用sshfence隔离机制时才需要配置ssh免登陆-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--配置sshfence隔离机制超时时间,脚本切换可不配置-->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!--开启自动故障转移-->
</configuration>
3.vi mapred-site.xml
<configuration>
<property>
<!--用于执行MapReduce作业的运行时框架。可以是local,classic或yarn。-->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--以下端口号不要随意改动-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop04:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop04:19888</value>
</property>
</configuration>
4.vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<!--指定mapreduce的时候使用shuffle-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<!--指定ResourceManager所在的主机名-->
<name>yarn.resourcemanager.hostname</name>
<value>hadoop04</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
</configuration>
5.vi slaves
hadoop02
hadoop03
hadoop04
6.配置hadoop-env.sh,mapred-env.sh,yarn-env.sh中的JAVA_HOME
7.将配置好的hadoop分发到其他节点上
[root@hadoop02 ~]# scp -r hadoop-2.5.0-cdh5.3.6/ hadoop03:`pwd`
[root@hadoop02 ~]# scp -r hadoop-2.5.0-cdh5.3.6/ hadoop04:`pwd`
8.启动Zookeeper(三个节点都要启动)
[root@hadoop04 ~]# zkServer.sh start
JMX enabled by default
Using config: /opt/cdh/zookeeper-3.4.5-cdh5.3.6/bin/../conf/zoo.cfg
Starting zookeeper ... STARTED
[root@hadoop04 ~]# zkServer.sh status
JMX enabled by default
Using config: /opt/cdh/zookeeper-3.4.5-cdh5.3.6/bin/../conf/zoo.cfg
Mode: follower
9.启动三台机器的JournalNode(三台都要)
[root@hadoop02 ~]# hadoop-daemon.sh start journalnode
starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop02.out
10.格式化文件系统并启动NameNode(只需要格式化一台namenode)
[root@hadoop02 ~]# hdfs namenode -format
INFO common.Storage: Storage directory /var/ha/cdh/dfs/name has been successfully formatted.
[root@hadoop02 ~]# hadoop-daemon.sh start namenode
starting namenode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-namenode-hadoop02.out
[root@hadoop02 ~]# jps
1154 QuorumPeerMain
1412 Jps
1342 NameNode
1230 JournalNode
11.往第二个NameNode上同步数据并启动
[root@hadoop03 ~]# hdfs namenode -bootstrapStandby
INFO common.Storage: Storage directory /var/ha/cdh/dfs/name has been successfully formatted.
[root@hadoop03 ~]# hadoop-daemon.sh start namenode
starting namenode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-namenode-hadoop03.out
[root@hadoop03 ~]# jps
1408 Jps
1146 QuorumPeerMain
1338 NameNode
1228 JournalNode
访问hadoop02:50070,hadoop03:50070
12.手动切换namenode状态
#切换hadoop01为Active状态
[root@hadoop02 ~]# hdfs haadmin -transitionToActive nn1
19/06/08 15:26:37 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
#切换hadoop01为StandBy状态
[root@hadoop02 ~]# hdfs haadmin -transitionToStandby nn1
19/06/08 15:32:47 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
可以在上述命令后面加上 --forceactive,强制变为Active状态
#查看namenode状态
[root@hadoop02 ~]# hdfs haadmin -getServiceState nn1
19/06/08 15:38:40 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
active
[root@hadoop02 ~]# hdfs haadmin -getServiceState nn2
19/06/08 15:38:49 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
standby
配置自动切换
1.将集群彻底关闭
[root@hadoop02 ~]# stop-all.sh
2.配置hdfs-site.xml
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
3.在Zookeeper上生成一个节点hadoop-ha
[root@hadoop02 hadoop]# hdfs zkfc -formatZK
INFO ha.ActiveStandbyElector: Successfully created /hadoop-ha/ns1 in ZK.
[zk: localhost:2181(CONNECTED) 0] ls /
[zookeeper, hadoop-ha]
4.启动集群
[root@hadoop02 hadoop]# start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
19/06/08 15:51:23 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [hadoop02 hadoop03]
hadoop02: starting namenode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-namenode-hadoop02.out
hadoop03: starting namenode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-namenode-hadoop03.out
hadoop03: starting datanode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-datanode-hadoop03.out
hadoop04: starting datanode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-datanode-hadoop04.out
hadoop02: starting datanode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-datanode-hadoop02.out
Starting journal nodes [hadoop02 hadoop03 hadoop04]
hadoop04: starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop04.out
hadoop03: starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop03.out
hadoop02: starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop02.out
19/06/08 15:51:39 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting ZK Failover Controllers on NN hosts [hadoop02 hadoop03]
hadoop03: starting zkfc, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-zkfc-hadoop03.out
hadoop02: starting zkfc, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-zkfc-hadoop02.out
starting yarn daemons
starting resourcemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-resourcemanager-hadoop02.out
hadoop03: starting nodemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-nodemanager-hadoop03.out
hadoop04: starting nodemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-nodemanager-hadoop04.out
hadoop02: starting nodemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-nodemanager-hadoop02.out
[root@hadoop02 hadoop]# jps
4384 Jps
1154 QuorumPeerMain
4243 NodeManager
3620 NameNode
3881 JournalNode
3711 DataNode
4063 DFSZKFailoverController
[root@hadoop03 hadoop]# jps
2976 DFSZKFailoverController
3041 NodeManager
3154 Jps
2853 JournalNode
2775 DataNode
2714 NameNode
1146 QuorumPeerMain
[root@hadoop04 hadoop]# jps
2128 JournalNode
2050 DataNode
1146 QuorumPeerMain
2205 NodeManager
2318 Jps
5.启动ResourceManager
[root@hadoop04 hadoop]# yarn-daemon.sh start resourcemanager
starting resourcemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-resourcemanager-hadoop04.out
[root@hadoop04 hadoop]# jps
2128 JournalNode
2050 DataNode
2579 Jps
2344 ResourceManager
1146 QuorumPeerMain
2205 NodeManager
6.启动historyserver
[root@hadoop04 hadoop]# mr-jobhistory-daemon.sh start historyserver
starting historyserver, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/mapred-root-historyserver-hadoop04.out
[root@hadoop04 hadoop]# jps
2608 JobHistoryServer
2128 JournalNode
2641 Jps
2050 DataNode
2344 ResourceManager
1146 QuorumPeerMain
2205 NodeManager
关于ResourceManager的HA
1.vi yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<!--指定mapreduce的时候使用shuffle-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
<property>
<!--开启ResourceManager HA-->
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--声明两台RM的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rmcluster</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop03</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop04</value>
</property>
<!--指定zookeeper集群地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop02:2181,hadoop03:2181,hadoop04:2181</value>
</property>
<!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
</configuration>
2.启动集群
[root@hadoop02 hadoop]# start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
19/06/08 16:35:17 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting namenodes on [hadoop02 hadoop03]
hadoop02: starting namenode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-namenode-hadoop02.out
hadoop03: starting namenode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-namenode-hadoop03.out
hadoop02: starting datanode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-datanode-hadoop02.out
hadoop03: starting datanode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-datanode-hadoop03.out
hadoop04: starting datanode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-datanode-hadoop04.out
Starting journal nodes [hadoop02 hadoop03 hadoop04]
hadoop04: starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop04.out
hadoop02: starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop02.out
hadoop03: starting journalnode, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-journalnode-hadoop03.out
19/06/08 16:35:33 WARN util.NativeCodeLoader: Unable to load native-hadoop library for your platform... using builtin-java classes where applicable
Starting ZK Failover Controllers on NN hosts [hadoop02 hadoop03]
hadoop02: starting zkfc, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-zkfc-hadoop02.out
hadoop03: starting zkfc, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/hadoop-root-zkfc-hadoop03.out
starting yarn daemons
starting resourcemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-resourcemanager-hadoop02.out
hadoop04: starting nodemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-nodemanager-hadoop04.out
hadoop03: starting nodemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-nodemanager-hadoop03.out
hadoop02: starting nodemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-nodemanager-hadoop02.out
[root@hadoop02 hadoop]# jps
5920 Jps
1154 QuorumPeerMain
5414 JournalNode
5243 DataNode
5597 DFSZKFailoverController
5790 NodeManager
5151 NameNode
[root@hadoop03 hadoop]# start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-resourcemanager-hadoop03.out
hadoop02: nodemanager running as process 5790. Stop it first.
hadoop04: nodemanager running as process 3078. Stop it first.
hadoop03: nodemanager running as process 4095. Stop it first.
[root@hadoop03 hadoop]# jps
3906 JournalNode
3827 DataNode
3765 NameNode
4406 Jps
4024 DFSZKFailoverController
1146 QuorumPeerMain
4267 ResourceManager
4095 NodeManager
[root@hadoop04 hadoop]# start-yarn.sh
starting yarn daemons
starting resourcemanager, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/yarn-root-resourcemanager-hadoop04.out
hadoop02: nodemanager running as process 5790. Stop it first.
hadoop04: nodemanager running as process 3078. Stop it first.
hadoop03: nodemanager running as process 4095. Stop it first.
[root@hadoop04 hadoop]# jps
2993 JournalNode
2914 DataNode
3078 NodeManager
3256 ResourceManager
1146 QuorumPeerMain
3567 Jps
[root@hadoop04 hadoop]# mr-jobhistory-daemon.sh start historyserver
starting historyserver, logging to /opt/cdh/hadoop-2.5.0-cdh5.3.6/logs/mapred-root-historyserver-hadoop04.out
[root@hadoop04 hadoop]# jps
2993 JournalNode
2914 DataNode
3605 JobHistoryServer
3078 NodeManager
3638 Jps
3256 ResourceManager
1146 QuorumPeerMain
hadoop02 | hadoop03 | hadoop04 |
---|---|---|
NameNode | NameNode | ResourceManager |
ZKFC | ZKFC | DataNode |
DataNode | DataNode | JournalNode |
JournalNode | JournalNode | NodeManager |
NodeManager | NodeManager | Zookeeper |
Zookeeper | Zookeeper | historyserver |
ResourceManager |
查看RM节点状态