准备:hadoop-2.7.1.tar.gz文件,jdk1.7、linux环境等
1、 创建hadoop用户用来,运行hadoop集群
useradd hadoop
passwd hadoop
2、 所有机器之间做双机互信,master通过远程启动datanode进程和tasktracker进程,如果不做双机互信,会导致每次启动集群服务都会需要密码
分别执行:
vim /etc/hosts
172.19.34.185 hadoop185
172.19.34.109 hadoop109
su hadoop
ssh-keygen -t rsa # 设置ssh无密码登录
ssh-copy-id -i hadoop185
3、 各机器做时间同步
su root
/usr/sbin/ntpdate ntp.api.bz
crontab -e
*/5 * * * * /usr/sbin/ntpdate ntp.api.bz &>/dev/null
4、 配置hadoop-master
tar xf hadoop-2.7.1.tar.gz -C /usr/local/
chown -R hadoop:hadoop /usr/local/hadoop-2.7.1
mv /usr/local/hadoop-2.7.1 /usr/local/hadoop
su hadoop
vim /usr/local/hadoop/etc/hadoop/hadoop-env.sh
# 配置:exportJAVA_HOME=${JAVA_HOME} ,可通过echo ${JAVA_HOME}查看
vim /usr/local/hadoop/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<value>/usr/local/hadoop/tmp</value>
</property>
<property>
<name>fs.default.name</name>
<value>hdfs://linux-3:9000</value>
</property>
</configuration>
# hadoop.tmp.dir属性用于定义Hadoop的临时目录,其默认为/tmp/hadoop-${username}。HDFS进程的许多目录默认都在此目录中,要保证运行Hadoop进程的用户对其具有全部访问权限。
# fs.default.name属性用于定义HDFS的名称节点和其默认的文件系统,其值是一个URI,即NameNode的RPC服务器监听的地址(可以是主机名)和端口(默认为8020)。其默认值为file:///,即本地文件系统。
vim /usr/local/hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<!--定义的HDFS元数据持久存储路径,默认为${hadoop.tmp.dir}/dfs/name-->
<name>dfs.data.dir</name>
<value>/usr/local/hadoop/data</value>
</property>
<property>
<!--保存副本的数量,默认是保存3份,根据实际slave数量配置-->
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
vi/usr/local/hadoop/etc/hadoop/mapred-site.xml
<configuration>
<property>
<!-- 指定mr框架为yarn方式-->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
vi/usr/local/hadoop/etc/hadoop/yarn-site.xml
<configuration>
<!-- 指定resourcemanager地址-->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>ubuntu3</value>
</property>
<!-- 指定nodemanager启动时加载server的方式为shuffle server -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
vi /usr/local/hadoop/etc/hadoop/slaves
hadoop185
hadoop109
5、 将已配置好的hadoop,scp到其它节机器
scp -r usr/local/hadoop hadoop@172.19.34.109:/usr/local/
6、 到bin目录下执行format检查是否报错
cd /usr/local/hadoop/bin
./hadoop namenode –format
# 输出检测结果
15/07/17 09:37:08 INFO metrics.TopMetrics:NNTop conf: dfs.namenode.top.windows.minutes = 1,5,25
15/07/17 09:37:08INFO namenode.FSNamesystem: Retry cache on namenode is enabled
15/07/17 09:37:08 INFO namenode.FSNamesystem:Retry cache will use 0.03 of total heap and retry cache entry expiry time is600000 millis
15/07/17 09:37:08INFO util.GSet: Computing capacity for map NameNodeRetryCache
15/07/17 09:37:08 INFO util.GSet: VM type = 64-bit
15/07/17 09:37:08 INFO util.GSet: 0.029999999329447746%max memory 888.9 MB = 273.1 KB
15/07/17 09:37:08INFO util.GSet: capacity = 2^15 =32768 entries
15/07/17 09:37:08 INFO namenode.FSImage:Allocated new BlockPoolId: BP-660215815-172.19.34.185-1437125828943
15/07/17 09:37:09 INFO common.Storage: Storagedirectory /usr/local/hadoop/tmp/dfs/name has been successfully formatted.
15/07/17 09:37:09 INFOnamenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
15/07/17 09:37:09 INFO util.ExitUtil: Exitingwith status 0
15/07/17 09:37:09 INFO namenode.NameNode:SHUTDOWN_MSG:
/************************************************************
SHUTDOWN_MSG:Shutting down NameNode at linux-3/172.19.34.185
************************************************************/
# 报错则根据具体报错情况调整
# 启动hadoop
# 主节点运行 , 执行成功后两个节点执行jps看服务状态
./start-all.sh
[hadoop@linux-3 sbin]$ jps
27709 NameNode
28628 Jps
28186 ResourceManager
28024 SecondaryNameNode
27845 DataNode
28297 NodeManager
[hadoop@xxfw-tomcat sbin]$ jps
3825 NodeManager
3677 DataNode
4087 Jps
http://NameNode:8088/ 查看yarn
http://NameNode:50070/ 查看hdfs