文章目录
安装集群前的准备工作
关闭防火墙,开机不自启
server iptables stop
chkconfig iptables off
关闭selinux
vi /etc/selinux/config
ssh免密码登录
ssh-keygen -t rsa
ssh-copy-id node01
修改主机名
vi /etc/sysconfig/network
设置主机名和IP的对应关系
vi /etc/hosts
远程拷贝:scp /etc/hosts node02:/etc/
安装jdk
。。。
查看之前的这篇安装jdk:
https://mp.csdn.net/mdeditor/102639879#
保证至少有三个虚拟机,每个虚拟机完成上面操作
安装Hadoop集群
上传压缩包并解压
[root@node01 ~]# mkdir -p /export/softwares /export/servers
[root@node01 ~]# cd /export/softwares/
[root@node01 softwares]# rz
[root@node01 softwares]# mv hadoop-2.6.0-cdh5.14.0-自己编译后的版本.tar.gz hadoop-2.6.0-cdh5.14.0.tar.gz
[root@node01 softwares]# cd /export/servers/
[root@node01 servers]# tar -zxvf /export/softwares/hadoop-2.6.0-cdh5.14.0.tar.gz
查看Hadoop支持的压缩方式以及本地库
[root@node01 /]# cd /export/servers/hadoop-2.6.0-cdh5.14.0/bin/
[root@node01 bin]# ./hadoop checknative
如果出现openssl为false,那么所有机器在线安装openssl即可,执行以下命令,虚拟机联网之后就可以在线进行安装了
[root@node01 bin]# yum -y install openssl-devel
[root@node01 bin]# ./hadoop checknative
修改配置文件
进入这个目录修改配置文件
cd /export/servers/hadoop-2.6.0-cdh5.14.0//etc/hadoop/
修改core-site.xml
[root@node01 hadoop]# vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://node01:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/tempDatas</value>
</property>
<!-- 缓冲区大小,实际工作中根据服务器性能动态调整 -->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<!-- 开启hdfs的垃圾桶机制,删除掉的数据可以从垃圾桶中回收,单位分钟 -->
<property>
<name>fs.trash.interval</name>
<value>10080</value>
</property>
</configuration>
修改hdfs-site.xml
[root@node01 hadoop]# vim hdfs-site.xml
<configuration>
<!-- NameNode存储元数据信息的路径,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 -->
<!-- 集群动态上下线
<property>
<name>dfs.hosts</name>
<value>/export/servers/hadoop-2.6.0-cdh5.14.0/etc/hadoop/accept_host</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/export/servers/hadoop-2.6.0-cdh5.14.0/etc/hadoop/deny_host</value>
</property>
-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node01:50090</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>node01:50070</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/namenodeDatas</value>
</property>
<!-- 定义dataNode数据存储的节点位置,实际工作中,一般先确定磁盘的挂载目录,然后多个目录用,进行分割 -->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/datanodeDatas</value>
</property>
<property>
<name>dfs.namenode.edits.dir</name>
<value>file:///export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/dfs/nn/edits</value>
</property>
<property>
<name>dfs.namenode.checkpoint.dir</name>
<value>file:///export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/dfs/snn/name</value>
</property>
<property>
<name>dfs.namenode.checkpoint.edits.dir</name>
<value>file:///export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/dfs/nn/snn/edits</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
</configuration>
修改Hadoop-env.sh
![在这里插入代码片](https://img-blog.csdnimg.cn/20191104093553522.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L0h4bTY0NDA=,size_16,color_FFFFFF,t_70)
无需修改
修改mapred-site.xml
[root@node01 hadoop]# vim mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node01:19888</value>
</property>
</configuration>
修改yarn-site.xml
[root@node01 hadoop]# vim mapred-site.xml
<configuration>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node01</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
修改slaves文件
[root@node01 hadoop]# vim slaves
node01
node02
node03
创建文件存放目录
mkdir -p /export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/tempDatas
mkdir -p /export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/namenodeDatas
mkdir -p /export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/datanodeDatas
mkdir -p /export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/dfs/nn/edits
mkdir -p /export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/dfs/snn/name
mkdir -p /export/servers/hadoop-2.6.0-cdh5.14.0/hadoopDatas/dfs/nn/snn/edits
安装包的分发
[root@node01 hadoopDatas]# cd /export/servers/
[root@node01 servers]# scp -r hadoop-2.6.0-cdh5.14.0/ node02:$PWD
[root@node01 servers]# scp -r hadoop-2.6.0-cdh5.14.0/ node03:$PWD
配置Hadoop的环境变量
[root@node01 profile.d]# vim /etc/profile.d/hadoop.sh
export HADOOP_HOME=/export/servers/hadoop-2.6.0-cdh5.14.0
export PATH=:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
配置完成之后生效
[root@node01 profile.d]# source /etc/profile
三台机器都要进行配置Hadoop的环境变量
[root@node01 profile.d]# scp hadoop.sh node02:$PWD
[root@node01 profile.d]# scp hadoop.sh node03:$PWD
配置完成之后生效
[root@node02 profile.d]# source /etc/profile
[root@node03 profile.d]# source /etc/profile
集群启动
[root@node01 sbin]# cd /export/servers/hadoop-2.6.0-cdh5.14.0/sbin
[root@node01 sbin]# start-all.sh
查看集群是否启动
方法一:
[root@node01 sbin]# jps
方法二:
hdfs集群访问地址: http://192.168.100.21:50070/dfshealth.html#tab-overview
方法三:
yarn集群访问地址: http://192.168.100.21:8088/cluster
关闭集群
[root@node01 sbin]# cd /export/servers/hadoop-2.6.0-cdh5.14.0/sbin
[root@node01 sbin]# stop-all.sh