大数据Hadoop–全分布,HA搭建
一.全分布式环境SSH免密登录以及时间同步设置
1.集群时间同步设置
(1)模拟内网环境:在集群中找一台服务器作为:时间服务器,
例如hadoop01作为时间服务器,hadoop02,hadoop03同步hadoop01。
(2)查看Linux中的ntpd时间服务(只需开启时间服务器的ntpd服务):
#查看ntpd的状态
[root@hadoop02 proc]# service ntpd status
ntpd (pid 964) is running...
#将ntpd设置为开机自启动(时间服务器)
[root@hadoop02 proc]# chkconfig ntpd on
[root@hadoop02 proc]# chkconfig | grep ntpd
ntpd 0:off 1:off 2:on 3:on 4:on 5:on 6:off
ntpdate 0:off 1:off 2:off 3:off 4:off 5:off 6:off
(3)修改系统配置
[root@hadoop02 proc]# vi /etc/ntp.conf
#第一处:修改为自己的网段
# Hosts on local network are less restricted.
restrict 192.168.159.0 mask 255.255.255.0 nomodify notrap
#第二处:内网环境不用添加服务,注释掉。
# Use public servers from the pool.ntp.org project.
# Please consider joining the pool (http://www.pool.ntp.org/join.html).
#server 0.centos.pool.ntp.org iburst
#server 1.centos.pool.ntp.org iburst
#server 2.centos.pool.ntp.org iburst
#server 3.centos.pool.ntp.org iburst
#第三处:添加启动本地服务
server 127.127.1.0
fudge 127.127.1.0 stratum 10
#重启ntpd服务
[root@hadoop02 proc]# service ntpd restart
Shutting down ntpd: [ OK ]
Starting ntpd: [ OK ]
(4)同步时间服务器
时间服务器同步国家授时中心服务器
[root@hadoop02 proc]# ntpdate -u ntp.sjtu.edu.cn
8 Jun 08:17:24 ntpdate[1148]: adjust time server 120.25.115.20 offset -0.001159 sec
从节点同步时间服务器
[root@hadoop03 proc]# service ntpd stop
Shutting down ntpd: [ OK ]
[root@hadoop03 proc]# ntpdate hadoop02
8 Jun 08:21:18 ntpdate[1137]: adjust time server 192.168.159.122 offset 0.005546 sec
[root@hadoop04 proc]# service ntpd stop
Shutting down ntpd: [ OK ]
[root@hadoop04 proc]# ntpdate hadoop02
8 Jun 08:21:18 ntpdate[1137]: adjust time server 192.168.159.122 offset -0.003125 sec
(5)设置从服务器定时同步时间服务器
[root@hadoop03 proc]# crontab -e
no crontab for root - using an empty one
crontab: installing new crontab
#synchronize time with time server
0-59/10 * * * * /usr/sbin/ntpdate hadoop01
二.hadoop集群环境部署
全分布服务节点部署规划
hadoop02 | hadoop03 | hadoop04 |
---|---|---|
namenode | resourcemanager | secondarynamenode |
datanode | datanode | datanode |
nodemanager | nodemanager | nodemanager |
historyserver |
1.清除原有日志文件(在原来配置过了的Hadoop的基础上修改)
[root@hadoop02 hadoop-2.7.3]# ls
bin etc include lib libexec LICENSE.txt logs native-2.7.3-snappy.tar.gz NOTICE.txt README.txt sbin share
[root@hadoop02 hadoop-2.7.3]# rm -rf logs/
[root@hadoop02 hadoop-2.7.3]# ls
bin etc include lib libexec LICENSE.txt native-2.7.3-snappy.tar.gz NOTICE.txt README.txt sbin share
2.修改配置文件
<!--vi core-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<!--namenode(主节点)访问入口-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop02:8020</value>
</property>
<!--配置metadata的存放位置-->
<property>
<name>hadoop.tmp.dir</name>
<value>/var/data/hadoop/full</value>
</property>
</configuration>
<!--vi hdfs-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop04:50090</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
<!--配置静态用户-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
</configuration>
#vi slaves
#-------------------------------------------------------------------------------------------------#
hadoop02
hadoop03
hadoop04
<!--vi mapred-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<property>
<!--用于执行MapReduce作业的运行时框架。可以是local,classic或yarn。-->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--以下端口号不要随意改动-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop04:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop04:19888</value>
</property>
</configuration>
<!--vi yarn-site.xml-->
---------------------------------------------------------------------------------------------------
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<!--指定mapreduce的时候使用shuffle-->