一步一步安装hadoop2.7.1 HA
Hadoop HA高可用架构图:
<pre name="code" class="html">
3台虚拟机节点分部:<pre name="code" class="html">
--不用图形界面
[root@node1 ~]# vi /etc/inittab
id:3:initdefault:
--linux基本设置
ifconfig
192.168.88.128
192.168.88.129
192.168.88.130
hostname node1
hostname node2
hostname node3
vi /etc/hosts
192.168.88.128 node1
192.168.88.129 node2
192.168.88.130 node3
vi /etc/sysconfig/network
HOSTNAME=node1
vi /etc/sysconfig/network-scripts/ifcfg-eth0
IPADDR=192.168.88.128
/sbin/service network restart #重新启动网络服务
service iptables stop
chkconfig iptables off
chkconfig|grep iptables
iptables 0:off 1:off 2:off 3:off 4:off 5:off 6:off
reboot之后主机名生效
--配置免密码
node1,2,3中执行:
su - hadoop
ssh-keygen -q -t rsa -N "" -f /home/hadoop/.ssh/id_rsa
cd .ssh
cat id_rsa.pub >> authorized_keys
chmod go-wx authorized_keys
node1中执行:
scp id_rsa.pub hadoop@node2:~
scp id_rsa.pub hadoop@node3:~
node2和node3中执行:
cat ~/id_rsa.pub>>~/.ssh/authorized_keys
cat ~/id_rsa.pub>>~/.ssh/authorized_keys
node1中执行:
ssh node2
ssh node3
------------------------------------------------------------
--安装jdk
用winscp上传jdk
[hadoop@node1 ~]$ tar -zxvf jdk-7u67-linux-x64.tar.gz
[hadoop@node1 bin]$ pwd
/home/hadoop/jdk1.7.0_67/bin
--这步可以跳过,后面统一弄
[hadoop@node1 bin]$ su - root
[root@node1 ~]# vi /etc/profile
export JAVA_HOME=/home/hadoop/jdk1.7.0_67
export PATH=${JAVA_HOME}/bin:$PATH
unset i
unset -f pathmunge
[root@node1 ~]# source /etc/profile
------------------------------------------------------------
--安装hadoop
[hadoop@node1 ~]$ tar -zxvf hadoop-2.7.1.tar.gz
/home/hadoop/hadoop-2.7.1
--加入环境变量
/home/hadoop/hadoop-2.7.1/sbin
/home/hadoop/hadoop-2.7.1/bin
--data和jn目录
[hadoop@node1 ~]$ mkdir -p /home/hadoop/hadoop-2.7.1/data
[hadoop@node1 ~]$ mkdir -p /home/hadoop/hadoop-2.7.1/data/jn/
[hadoop@node1 ~]$ mkdir /home/hadoop/tmp --存放pid
--修改配置文件
cd etc/hadoop
vi hadoop-env.sh 修改JAVA_HOME等
vi hdfs-site.xml
vi core-site.xml
vi slaves
node1
node2
node3
[hadoop@node2 hadoop]$ ll *env*sh
-rw-r--r--. 1 hadoop hadoop 4236 Nov 5 17:12 hadoop-env.sh
-rw-r--r--. 1 hadoop hadoop 1449 Nov 5 17:12 httpfs-env.sh
-rw-r--r--. 1 hadoop hadoop 1527 Nov 5 17:12 kms-env.sh
-rw-r--r--. 1 hadoop hadoop 1383 Nov 5 17:12 mapred-env.sh
-rw-r--r--. 1 hadoop hadoop 4567 Nov 5 17:12 yarn-env.sh
--覆盖hadoop配置文件
scp /home/hadoop/hadoop-2.7.1/etc/hadoop/* hadoop@node2:/home/hadoop/hadoop-2.7.1/etc/hadoop/
scp /home/hadoop/hadoop-2.7.1/etc/hadoop/* hadoop@node3:/home/hadoop/hadoop-2.7.1/etc/hadoop/
------------------------------------------------------------
--配置zk
hadoop@node1 ~]$ tar -zxvf zookeeper-3.4.6.tar.gz
--加入环境变量
/home/hadoop/zookeeper-3.4.6/bin
--安装目录下建立数据文件夹
[hadoop@node1 zookeeper-3.4.6]$ mkdir data
cd conf
cp zoo_sample.cfg zoo.cfg
vi zoo.cfg --修改dataDir=/home/hadoop/zookeeper-3.4.6/data
最后加上额外3行
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888
cd data
vi myid
1
scp -r zookeeper-3.4.6/ hadoop@node2:~ vi /opt/zookeeper/myid 2
scp -r zookeeper-3.4.6/ hadoop@node3:~ vi /opt/zookeeper/myid 3
scp同步下其他节点的配置文件
------------------------------------------------------------
配置环境变量,各个节点 root用户
[hadoop@node1 bin]$ su - root
[root@node1 ~]# vi /etc/profile
export JAVA_HOME=/home/hadoop/jdk1.7.0_67
export HADOOP_HOME=/home/hadoop/hadoop-2.7.1
export ZK_HOME=/home/hadoop/zookeeper-3.4.6
export PATH=$PATH:${JAVA_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:${ZK_HOME}/bin
unset i
unset -f pathmunge
[root@node1 ~]# source /etc/profile
------------------------------------------------------------
--启动3个zk
su - hadoop
zkServer.sh start
jps
zkServer.sh stop --.out文件增长很快
------------------------------------------------------------
--启动3个jn
cd sbin
hadoop-daemon.sh start journalnode
jps
------------------------------------------------------------
--格式化NN1
hdfs namenode -format
--启动NN1
hadoop-daemon.sh start namenode
--格式化NN2
hdfs namenode -bootstrapStandby --复制元数据到NN2
--启动NN2
hadoop-daemon.sh start namenode 看能否启动起来
--格式化zk
hdfs zkfc -formatZK --其中一个NN中执行
--启动zk
hadoop-daemon.sh start zkfc
--重启hadoop
NN1:
stop-dfs.sh
start-dfs.sh
--配置ip映射,hosts文件
C:\WINDOWS\system32\drivers\etc\hosts
192.168.88.128 node1
192.168.88.129 node2
192.168.88.130 node3
--谁先抢到锁,谁就就active
--监控界面
http://node1:50070
http://node2:50070
--测试上传文件
./hdfs dfs -mkdir -p /usr/file
./hdfs dfs -put /xxx/xxx.txt /usr/file
-------------------------------------------------------------
配置Map/Reduce
vi mapred-site.xml
vi yarn-site.xml
-------------------------------------------------------------------------------------------
全面启动
--所有节点
zkServer.sh start
zkServer.sh stop
--node1
start-all.sh
stop-all.sh
--监控界面
http://node1:8088
zkServer.sh start
start-all.sh
--检查zkfc是否成功起了
hadoop-daemon.sh start zkfc
--单独启动每一个步骤:
zkServer.sh start
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode
hadoop-daemon.sh start journalnode
hadoop-daemon.sh start zkfc
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start nodemanager
---------------------------------------------------------------
--附配置:
---------------------------------------------------------------
[hadoop@node1 conf]$ vi zoo.cfg
clientPort=2181 --默认
dataDir=/home/hadoop/zookeeper-3.4.6/data
server.1=node1:2888:3888
server.2=node2:2888:3888
server.3=node3:2888:3888
[hadoop@node1 hadoop]$ vi hadoop-env.sh
export JAVA_HOME=/home/hadoop/jdk1.7.0_67
export HADOOP_PID_DIR=/home/hadoop/dirtmp
export HADOOP_SECURE_DN_PID_DIR=/home/hadoop/dirtmp
vi mapred-env.sh
export HADOOP_MAPRED_PID_DIR=/home/hadoop/dirtmp
vi yarn-env.sh
export YARN_PID_DIR=/home/hadoop/dirtmp
vi hdfs-site.xml
<property>
<name>dfs.nameservices</name>
<value>odscluster</value>
</property>
<property>
<name>dfs.ha.namenodes.odscluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.odscluster.nn1</name>
<value>node1:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.odscluster.nn2</name>
<value>node2:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.odscluster.nn1</name>
<value>node1:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.odscluster.nn2</name>
<value>node2:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://node1:8485;node2:8485;node3:8485/odscluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.odscluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/hadoop/hadoop-2.7.1/data/jn</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
vi core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://odscluster</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>node1:2181,node2:2181,node3:2181</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/hadoop-2.7.1/data</value>
</property>
vi mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
vi yarn-site.xml
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>