centos 6.6 i386 dvd ; basic server installation ; not enable static ip ; not disable ipv6
vmware net model is NAT , subNet Ip : 192.168.5.0 , ip begin with 192.168.5.141
hostname : vmhost-1 ; root password : 123456
rm ~/* -f ; delete some logs in home dir
vi /boot/grub/grub.conf ; delete 'rhgb quiet' two words
vi /etc/sysconfig/network-scripts/ifcfg-eth0 ; ONBOOT=yes
service network start
yum install -y ntpdate
ntpdate cn.pool.ntp.org
rpm -qa | grep java
rpm -qa | grep java | awk '{print system{"rpm -e"$1}}'
useradd hadopp
passwd hadoop ; hadoop hadoop
logout
login : hadoop ; password : hadoop
mkdir download software ; in home dir
su - root ; 123456
vi /etc/sudoers ; /root yy p root-->hadoop ; wq!
vi /etc/hosts ; depends on real ip
192.168.5.141 vmhost-1
192.168.5.142 vmhost-2
192.168.5.143 vmhost-3
192.168.5.144 vmhost-4
exit
sudo /etc/init.d/sshd start ; hadoop
cd /etc/yum.repos.d/
sudo wget http://download.opensuse.org/repositories/home:Strahlex/CentOS_CentOS-6/home:Strahlex.repo
sudo yum install -y sshpass
securecrt ; connect SFTP session ; cd download ; put path/to/hadoop,jdk,zookeeper
sudo rpm -i jdk-7u71-linux-i586.rpm ; hadoop
tar -zxvf ~/download/hadoop-2.6.0.tar.gz -C ~/software
tar -zxvf ~/download/zookeeper-3.4.6.tar.gz -C ~/software
cp /usr/share/vim/vim72/vimrc_example.vim ~/.vimrc
vi ~/.vimrc ; set nu ; set tabstop=4 ; set nobackup
rm ~/.vimrc~
vi ~/.bashrc
export JAVA_HOME=/usr/java/jdk1.7.0_71
export HADOOP_HOME=/home/hadoop/software/hadoop-2.6.0
export ZOOKEEPER_HOME=/home/hadoop/software/zookeeper-3.4.6
source ~/.bashrc
mkdir zookeeper-data ; in home dir
cd ~/zookeeper-data ; mkdir data ; mkdir log
mkdir hadoop-data ; in home dir
cd ~/hadoop-data ; mkdir tmp ; mkdir hdfs ; cd hdfs ; mkdir name data journal
cd ~/software/zookeeper-3.4.6/conf
cp zoo_sample.cfg zoo.cfg
dataLogDir=/home/hadoop/zookeeper-data/log
server.1=vmhost-1:2888:3888
server.2=vmhost-2:2888:3888
server.3=vmhost-3:2888:3888
vi ~/zookeeper-data/data/myid ; i 1 :wq
cd ~/software/hadoop-2.6.0/etc/hadoop
vi slaves
vmhost-1
vmhost-2
vmhost-3
vmhost-4
cp core-site.xml core-site.xml-bak
cp hdfs-site.xml hdfs-site.xml-bak
cp yarn-site.xml yarn-site.xml-bak
securecrt ; connect SFTP session ; cd software/hadoop-2.6.0/etc/hadoop/
put /path/to/xmls/*.xml
vi core-site.xml ; config tmp.dir zookeeper.quorum
vi hdfs-site.xml ; config hostname dirs ; !!! Note : host-3 host-4 enable cluster2 after cloned VMs.
vi yarn-site.xml ; config hostname
clone the virtual machine 1 to 2,3,4
start VM 1, 2, 3, 4 must in order at anytime
# 2, 3, 4 every , must in order
sudo vi /etc/sysconfig/network ; modify hostname
sudo vi /etc/udev/rules.d/70-persistent-net.rules ; modify eth0
sudo vi /etc/sysconfig/network-scripts/ifcfg-eth0 ; modify HWaddr
vi ~/zookeeper-data/data/myid ; r 2 | 3 | 4 :wq
reboot
have not set static ip , must start up machines in order.
# 1, 2, 3, 4 every , must in order
ssh-keygen -t rsa -C mail-(1,2,3,4)@dean.org ; enter ; enter ; enter
all done in 1, 2, 3, 4 then
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@vmhost-1
# 1
scp ~/.ssh/authorized_keys hadoop@vmhost-2:/home/hadoop/.ssh/
scp ~/.ssh/authorized_keys hadoop@vmhost-3:/home/hadoop/.ssh/
scp ~/.ssh/authorized_keys hadoop@vmhost-4:/home/hadoop/.ssh/
# 1, 2, 3, 4 every ; ssh access another every machine
ssh vmhost-(1,2,3,4)
ssh 192.168.5.14(1,2,3,4)
# 1, 2, 3, 4 every
sudo iptables -F
sudo chkconfig --level 35 iptables off
# blow all in home dir executes
# 1, 2, 3 ; zookeeper.out will be in home dir
zkServer.sh start
zkServer.sh status
# 1
zkCli.sh or zkCli.sh -server vmhost-1:2181
# 1, 3
hdfs zkfc -formatZK
# 1
zkCli.sh -server vmhost-1:2181 ; ls /
# 2, 3, 4
hadoop-daemon.sh start journalnode
or on 2 | 3 | 4 hadoop-daemons.sh --hosts allnodehosts start journalnode
# 1
hdfs namenode -format -clusterId Federation
hadoop-daemon.sh start namenode
# 2
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
# 3
hdfs namenode -format -clusterId Federation
hadoop-daemon.sh start namenode
# 4
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
# 1, 2, 3, 4
hadoop-daemon.sh start datanode
or on 1 | 2 | 3 | 4 hadoop-daemons.sh start datanode
# 1, 2, 3, 4
hadoop-daemon.sh start zkfc
# 1, 3
start-yarn.sh
-----------------------some useful link show status------------------------------
http://192.168.5.141:50070/dfshealth.html
http://192.168.5.142:50070/dfshealth.html
http://192.168.5.143:50070/dfshealth.html
http://192.168.5.144:50070/dfshealth.html
http://192.168.5.141:8042/node
http://192.168.5.143:8042/node
http://192.168.5.143:8088/cluster
-------------------------------administration command-----------------------------------------------
hadoop fs -ls /
hdfs haadmin -ns hadoop-cluster1 -transitionToActive nn1 ??
hdfs dfsadmin -printTopology ??
hadoop fs -mkdir hdfs://192.168.1.201:9000/cluster-01 \
hadoop fs -mkdir hdfs://192.168.1.203:9000/cluster-02 | ??
hadoop fs -mkdir hdfs://192.168.1.201:9000/user /
------------------not do yet-----------------!!!----------------
yarn-daemon.sh start nodemanager
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start proxyserver
mr-jobhistory-daemon.sh start historyserver
$ hostname #查看host名字
cluster1
$ sbin/hadoop-daemon.sh --script hdfs start namenode # 启动namenode
$ sbin/hadoop-daemon.sh --script hdfs start datanode # 启动datanode
$ sbin/yarn-daemon.sh start nodemanager #启动nodemanager
$ sbin/yarn-daemon.sh start resourcemanager # 启动resourcemanager
$ sbin/yarn-daemon.sh start proxyserver #启动web App proxy, 作用类似jobtracker,若yarn-site.xml里没有设置yarn.web-proxy.address的host和端口,或者设置了和resourcemanager相同的host和端口,则hadoop默认proxyserver和resourcemanager共享host:port
$ sbin/mr-jobhistory-daemon.sh start historyserver #你懂得
$ ssh cluster2 #登录cluster2
$ hostname #查看host名字cluster2
$ sbin/yarndaemon.sh start nodemanager # 启动nodemanager
$ sbin/hadoop-daemon.sh --script hdfs start datanode # 启动datanode
$ ssh cluster3 #登录cluster3...# cluster2, cluster3, cluster4启动方式和cluster2一样。
--
$ sbin/hadoop-daemons.sh--hosts namenodehosts --script hdfsstart namenode
$ sbin/hadoop-daemons.sh--hosts datanodehosts --script hdfsstart datanode
$ sbin/yarn-daemons.sh--hostnames cluster1 start resourcemanager
$ sbin/yarn-daemons.sh--hosts allnodehosts start nodemanager
$ sbin/yarn-daemons.sh--hostnames cluster1 start proxyserver
$ sbin/mr-jobhistory-daemon.sh start historyserver
---------------clean up when zookeeper start failed------------------------------------
# clean up ; $PWD is home dir
rm zookeeper.out \
rm zookeeper-data/data/version-2/ zookeeper-data/data/zookeeper_server.pid -rf \
rm zookeeper-data/log/version-2 -rf
----------compile hadoop from src------can do before clone VMs--------------------------------------
sudo yum install -y gcc-c++ ; protobuf compile
sudo yum install -y gcc cmake zlib-devel openssl-devel ; hadoop compile
get maven.tar.gz , put it in ~/download dir
tar -zxvf ~/download/maven.tar.gz -C ~/software
vi ~/.bashrc ; add M2_HOME ; add M2_HOME/bin to PATH ; source ~/.bashrc
get protobuf.tar.gz , put it in ~/download dir
tar -zxvf ~/download/protobuf-xxx.tar.gz -C ~/software
cd ~/software/protobuf-xxx
./configure ; make ; make check ; sudo make install
get hadoop-xx.src.tar.gz , put it in ~/download dir
mkdir ~/sourceCode
tar -zxvf ~/download/hadoop-xx.src.tar.gz -C ~/sourceCode
cd ~/sourceCode/hadoop-xxx-src
mvn package -Pdist,native -DskipTests -Dtar
cd hadoop-dist/target/hadoop-2.6.0/lib/native/
cp -f * /home/hadoop/software/hadoop-2.6.0/lib/native/
scp * hadoop@vmhost-(2,3,4):/home/hadoop/software/hadoop-2.6.0/lib/native/
!! result : the warning of 'execstack -c <lib path>' never appear , but some command becomes slowly
-------------------some common software package-----------------
sudo yum install -y tree
--------------------some stastics information , daemon threads of four virtual machines--------------------------
1: |QuorumPeerMain |ResourceManager |DFSZKFailoverController |DataNode |NameNode |NodeManager |
2: |QuorumPeerMain | |DFSZKFailoverController |DataNode |NameNode |NodeManager |JournalNode
3: |QuorumPeerMain |ResourceManager |DFSZKFailoverController |DataNode |NameNode |NodeManager |JournalNode
vmware net model is NAT , subNet Ip : 192.168.5.0 , ip begin with 192.168.5.141
hostname : vmhost-1 ; root password : 123456
rm ~/* -f ; delete some logs in home dir
vi /boot/grub/grub.conf ; delete 'rhgb quiet' two words
vi /etc/sysconfig/network-scripts/ifcfg-eth0 ; ONBOOT=yes
service network start
yum install -y ntpdate
ntpdate cn.pool.ntp.org
rpm -qa | grep java
rpm -qa | grep java | awk '{print system{"rpm -e"$1}}'
useradd hadopp
passwd hadoop ; hadoop hadoop
logout
login : hadoop ; password : hadoop
mkdir download software ; in home dir
su - root ; 123456
vi /etc/sudoers ; /root yy p root-->hadoop ; wq!
vi /etc/hosts ; depends on real ip
192.168.5.141 vmhost-1
192.168.5.142 vmhost-2
192.168.5.143 vmhost-3
192.168.5.144 vmhost-4
exit
sudo /etc/init.d/sshd start ; hadoop
cd /etc/yum.repos.d/
sudo wget http://download.opensuse.org/repositories/home:Strahlex/CentOS_CentOS-6/home:Strahlex.repo
sudo yum install -y sshpass
securecrt ; connect SFTP session ; cd download ; put path/to/hadoop,jdk,zookeeper
sudo rpm -i jdk-7u71-linux-i586.rpm ; hadoop
tar -zxvf ~/download/hadoop-2.6.0.tar.gz -C ~/software
tar -zxvf ~/download/zookeeper-3.4.6.tar.gz -C ~/software
cp /usr/share/vim/vim72/vimrc_example.vim ~/.vimrc
vi ~/.vimrc ; set nu ; set tabstop=4 ; set nobackup
rm ~/.vimrc~
vi ~/.bashrc
export JAVA_HOME=/usr/java/jdk1.7.0_71
export HADOOP_HOME=/home/hadoop/software/hadoop-2.6.0
export ZOOKEEPER_HOME=/home/hadoop/software/zookeeper-3.4.6
source ~/.bashrc
mkdir zookeeper-data ; in home dir
cd ~/zookeeper-data ; mkdir data ; mkdir log
mkdir hadoop-data ; in home dir
cd ~/hadoop-data ; mkdir tmp ; mkdir hdfs ; cd hdfs ; mkdir name data journal
cd ~/software/zookeeper-3.4.6/conf
cp zoo_sample.cfg zoo.cfg
dataLogDir=/home/hadoop/zookeeper-data/log
server.1=vmhost-1:2888:3888
server.2=vmhost-2:2888:3888
server.3=vmhost-3:2888:3888
vi ~/zookeeper-data/data/myid ; i 1 :wq
cd ~/software/hadoop-2.6.0/etc/hadoop
vi slaves
vmhost-1
vmhost-2
vmhost-3
vmhost-4
cp core-site.xml core-site.xml-bak
cp hdfs-site.xml hdfs-site.xml-bak
cp yarn-site.xml yarn-site.xml-bak
securecrt ; connect SFTP session ; cd software/hadoop-2.6.0/etc/hadoop/
put /path/to/xmls/*.xml
vi core-site.xml ; config tmp.dir zookeeper.quorum
vi hdfs-site.xml ; config hostname dirs ; !!! Note : host-3 host-4 enable cluster2 after cloned VMs.
vi yarn-site.xml ; config hostname
clone the virtual machine 1 to 2,3,4
start VM 1, 2, 3, 4 must in order at anytime
# 2, 3, 4 every , must in order
sudo vi /etc/sysconfig/network ; modify hostname
sudo vi /etc/udev/rules.d/70-persistent-net.rules ; modify eth0
sudo vi /etc/sysconfig/network-scripts/ifcfg-eth0 ; modify HWaddr
vi ~/zookeeper-data/data/myid ; r 2 | 3 | 4 :wq
reboot
have not set static ip , must start up machines in order.
# 1, 2, 3, 4 every , must in order
ssh-keygen -t rsa -C mail-(1,2,3,4)@dean.org ; enter ; enter ; enter
all done in 1, 2, 3, 4 then
ssh-copy-id -i ~/.ssh/id_rsa.pub hadoop@vmhost-1
# 1
scp ~/.ssh/authorized_keys hadoop@vmhost-2:/home/hadoop/.ssh/
scp ~/.ssh/authorized_keys hadoop@vmhost-3:/home/hadoop/.ssh/
scp ~/.ssh/authorized_keys hadoop@vmhost-4:/home/hadoop/.ssh/
# 1, 2, 3, 4 every ; ssh access another every machine
ssh vmhost-(1,2,3,4)
ssh 192.168.5.14(1,2,3,4)
# 1, 2, 3, 4 every
sudo iptables -F
sudo chkconfig --level 35 iptables off
# blow all in home dir executes
# 1, 2, 3 ; zookeeper.out will be in home dir
zkServer.sh start
zkServer.sh status
# 1
zkCli.sh or zkCli.sh -server vmhost-1:2181
# 1, 3
hdfs zkfc -formatZK
# 1
zkCli.sh -server vmhost-1:2181 ; ls /
# 2, 3, 4
hadoop-daemon.sh start journalnode
or on 2 | 3 | 4 hadoop-daemons.sh --hosts allnodehosts start journalnode
# 1
hdfs namenode -format -clusterId Federation
hadoop-daemon.sh start namenode
# 2
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
# 3
hdfs namenode -format -clusterId Federation
hadoop-daemon.sh start namenode
# 4
hdfs namenode -bootstrapStandby
hadoop-daemon.sh start namenode
# 1, 2, 3, 4
hadoop-daemon.sh start datanode
or on 1 | 2 | 3 | 4 hadoop-daemons.sh start datanode
# 1, 2, 3, 4
hadoop-daemon.sh start zkfc
# 1, 3
start-yarn.sh
-----------------------some useful link show status------------------------------
http://192.168.5.141:50070/dfshealth.html
http://192.168.5.142:50070/dfshealth.html
http://192.168.5.143:50070/dfshealth.html
http://192.168.5.144:50070/dfshealth.html
http://192.168.5.141:8042/node
http://192.168.5.143:8042/node
http://192.168.5.143:8088/cluster
-------------------------------administration command-----------------------------------------------
hadoop fs -ls /
hdfs haadmin -ns hadoop-cluster1 -transitionToActive nn1 ??
hdfs dfsadmin -printTopology ??
hadoop fs -mkdir hdfs://192.168.1.201:9000/cluster-01 \
hadoop fs -mkdir hdfs://192.168.1.203:9000/cluster-02 | ??
hadoop fs -mkdir hdfs://192.168.1.201:9000/user /
------------------not do yet-----------------!!!----------------
yarn-daemon.sh start nodemanager
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start proxyserver
mr-jobhistory-daemon.sh start historyserver
$ hostname #查看host名字
cluster1
$ sbin/hadoop-daemon.sh --script hdfs start namenode # 启动namenode
$ sbin/hadoop-daemon.sh --script hdfs start datanode # 启动datanode
$ sbin/yarn-daemon.sh start nodemanager #启动nodemanager
$ sbin/yarn-daemon.sh start resourcemanager # 启动resourcemanager
$ sbin/yarn-daemon.sh start proxyserver #启动web App proxy, 作用类似jobtracker,若yarn-site.xml里没有设置yarn.web-proxy.address的host和端口,或者设置了和resourcemanager相同的host和端口,则hadoop默认proxyserver和resourcemanager共享host:port
$ sbin/mr-jobhistory-daemon.sh start historyserver #你懂得
$ ssh cluster2 #登录cluster2
$ hostname #查看host名字cluster2
$ sbin/yarndaemon.sh start nodemanager # 启动nodemanager
$ sbin/hadoop-daemon.sh --script hdfs start datanode # 启动datanode
$ ssh cluster3 #登录cluster3...# cluster2, cluster3, cluster4启动方式和cluster2一样。
--
$ sbin/hadoop-daemons.sh--hosts namenodehosts --script hdfsstart namenode
$ sbin/hadoop-daemons.sh--hosts datanodehosts --script hdfsstart datanode
$ sbin/yarn-daemons.sh--hostnames cluster1 start resourcemanager
$ sbin/yarn-daemons.sh--hosts allnodehosts start nodemanager
$ sbin/yarn-daemons.sh--hostnames cluster1 start proxyserver
$ sbin/mr-jobhistory-daemon.sh start historyserver
---------------clean up when zookeeper start failed------------------------------------
# clean up ; $PWD is home dir
rm zookeeper.out \
rm zookeeper-data/data/version-2/ zookeeper-data/data/zookeeper_server.pid -rf \
rm zookeeper-data/log/version-2 -rf
----------compile hadoop from src------can do before clone VMs--------------------------------------
sudo yum install -y gcc-c++ ; protobuf compile
sudo yum install -y gcc cmake zlib-devel openssl-devel ; hadoop compile
get maven.tar.gz , put it in ~/download dir
tar -zxvf ~/download/maven.tar.gz -C ~/software
vi ~/.bashrc ; add M2_HOME ; add M2_HOME/bin to PATH ; source ~/.bashrc
get protobuf.tar.gz , put it in ~/download dir
tar -zxvf ~/download/protobuf-xxx.tar.gz -C ~/software
cd ~/software/protobuf-xxx
./configure ; make ; make check ; sudo make install
get hadoop-xx.src.tar.gz , put it in ~/download dir
mkdir ~/sourceCode
tar -zxvf ~/download/hadoop-xx.src.tar.gz -C ~/sourceCode
cd ~/sourceCode/hadoop-xxx-src
mvn package -Pdist,native -DskipTests -Dtar
cd hadoop-dist/target/hadoop-2.6.0/lib/native/
cp -f * /home/hadoop/software/hadoop-2.6.0/lib/native/
scp * hadoop@vmhost-(2,3,4):/home/hadoop/software/hadoop-2.6.0/lib/native/
!! result : the warning of 'execstack -c <lib path>' never appear , but some command becomes slowly
-------------------some common software package-----------------
sudo yum install -y tree
--------------------some stastics information , daemon threads of four virtual machines--------------------------
1: |QuorumPeerMain |ResourceManager |DFSZKFailoverController |DataNode |NameNode |NodeManager |
2: |QuorumPeerMain | |DFSZKFailoverController |DataNode |NameNode |NodeManager |JournalNode
3: |QuorumPeerMain |ResourceManager |DFSZKFailoverController |DataNode |NameNode |NodeManager |JournalNode
4: | | |DFSZKFailoverController |DataNode |NameNode |NodeManager |JournalNode
修改过vmhost-1上的配置文件,建议备份 一份。
Hadoop的Demo配置文件在这里
后续后添加文字详细描述搭建过程,见谅