环境:
VMware 4实例:
HOST
|
System
|
Memory
|
CPU
|
hadoop1
|
Ubuntu 64
|
1G
|
2 Core
|
hadoop2
|
Ubuntu 64
|
1G
|
2 Core
|
hadoop3
|
Ubuntu 64
|
2G
|
2 Core
|
hadoop4
|
Ubuntu 64
|
2G
|
2 Core
|
hadoop5
|
Centos7 64
|
2G
|
2 Core
|
hadoop6
|
Centos7 64
|
2G
|
2 Core
|
hadoop7
|
Centos7 64
|
2G
|
2 Core
|
集群搭建:
Host
|
NN
|
DN
|
ZK
|
ZKFC
|
JN
|
RM
|
DM
|
hadoop1
|
1
|
|
|
1
|
|
1
|
|
hadoop2
|
1
|
|
1
|
1
|
|
|
|
hadoop3
|
|
1
|
|
|
1
|
|
1
|
hadoop4
|
|
1
|
1
|
|
|
|
1
|
hadoop5
|
|
1
|
|
|
1
|
|
1
|
hadoop6
|
|
1
|
1
|
|
|
|
1
|
hadoop7
|
|
1
|
|
|
1
|
|
1
|
参数配置:
hdfs-site.xml:
<configuration>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>hadoop1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>hadoop2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>hadoop1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>hadoop2:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop3:8485;hadoop5:8485;hadoop7:8485/mycluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_dsa</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>${hadoop.tmp.dir}/journal/data</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>67108864</value>
</property>
</configuration>
core-site.xml:
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/tmp</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop2:2181,hadoop4:2181,hadoop6:2181</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
</configuration>
maperd-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
~
yarn-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
zookeeper
conf/zoo.cfg
# The number of milliseconds of each tick
tickTime=2000
# The number of ticks that the initial
# synchronization phase can take
initLimit=10
# The number of ticks that can pass between
# sending a request and getting an acknowledgement
syncLimit=5
# the directory where the snapshot is stored.
# do not use /tmp for storage, /tmp here is just
# example sakes.
dataDir=/home/hadoop/tmp/zookeeper
# the port at which the clients will connect
clientPort=2181
# the maximum number of client connections.
# increase this if you need to handle more clients
#maxClientCnxns=60
#
# Be sure to read the maintenance section of the
# administrator guide before turning on autopurge.
#
# http://zookeeper.apache.org/doc/current/zookeeperAdmin.html#sc_maintenance
#
# The number of snapshots to retain in dataDir
#autopurge.snapRetainCount=3
# Purge task interval in hours
# Set to "0" to disable auto purge feature
#autopurge.purgeInterval=1
server.1=hadoop1:2888:3888
server.2=hadoop2:2888:3888
server.3=hadoop3:2888:3888
分别在对应server机器的dataDir下创建myid文件,对应server.X中的ID。
echo X > /home/hadoop/tmp/journal/myid
启动步骤:
1 分别在不同机器上启动zookeeper server
./zkServer.sh start
2 分别在机器上启动journalnode
sbin/hadoop-daemon.sh start journalnode
3 在其中一台namenode上格式化
bin/hdfs namenode -format
4 启动格式化的namenode
sbin/hadoop-daemon.sh start namenode
5 同步初始化另一台namenode
bin/hdfs namenode -bootstrapStandby
6 格式化zookeeper,在其中一台namenode机器上执行
bin/hdfs zkfc -formatZK
7 停止所有的服务,除zookeeper外
sbin/stop-dfs.sh
8 启动hdfs
sbin/start-dfs.sh