1 安装Ubuntu18.04服务器
下载地址:https://ubuntu.com/download/server
2 配置Ubuntu服务器
(1) 网络配置
vim/etc/netplan/50-cloud-init.yaml
network
ethernets:
ens33:
dhcp4: no
addresses: [192.168.30.110/24]
gateway4: 192.168.30.2
nameservers:
addresses: [114.114.114.114 , 8.8.8.8]
search: [localdomain]
version: 2
(2) 重启网络
Shell中输入:netpaln
ifconfig -a 查看网络配置是否成功
(3) 修改主机名
vim /etc/localhost
(4) 修改hosts文件
vim /etc/hosts
(5) 安装SSH服务
apt-get install openssh-server openssh-client
3 克隆虚拟机
右键-->管理-->克隆-->创建完整克隆
4 配置克隆虚拟机
重复步骤2
5 配置SSH免密登录
每台机器都需要生成密钥:ssh-keygen
每台机器 Copy密钥到每台机器:ssh-copy-id root@主机名
3 在master主机上配置Hadoop
(1) 上传Hadoop安装包到/usr/local
(2) 解压文件Hadoop安装包
进入到Hadoop存放的目录下:cd /usr/local
解压Hadoop安装包 tar -zvxf hadoop-3.1.2.tar.gz
(3) 修改hadoop-env.sh
1) vim /usr/local/hadoop-3.1.2/etc/hadoop/hadoop-env.sh
修改文件中的export JAVA_HOME=/usr/local/jdk1.8.0_221为服务器上的JDK安装路径
(4) 修改hdfs-site.xml
1) vim /usr/local/hadoop-3.1.2/etc/hadoop/hdfs-site.xml
<configuration>
<!--指定namenode元素据存放的路径-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///usr/local/hadoop/hdfs/name</value>
<final>true</final>
</property>
<!--指定datanode的素据存放的路径-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///usr/local/hadoop/hdfs/data</value>
<final>true</final>
</property>
<!--指定namenode的web ui监控端口-->
<property>
<name>dfs.http.address</name>
<value>master:9870</value>
</property>
<!--指定secondary datanode的素据存放的路径-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:9890</value>
</property>
<!--指定副本数量-->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!--指定web端是否开启操作hdfs的权限-->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!--指定是否开启文件权限系统-->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
(5) 修改mapred-site.xml
1) vim /usr/local/hadoop-3.1.2/etc/hadoop/mapred-site.xml
<configuration>
<!--指定mapreduce的运行框架平台-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!--指定历史作业的内部通讯端口-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!--指定历史作业的Web ui监控端口-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>
/usr/local/hadoop-3.1.2/etc/hadoop,
/usr/local/hadoop-3.1.2/share/hadoop/common/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/common/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/*
</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>
/usr/local/hadoop-3.1.2/etc/hadoop,
/usr/local/hadoop-3.1.2/share/hadoop/common/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/common/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/*
</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>
/usr/local/hadoop-3.1.2/etc/hadoop,
/usr/local/hadoop-3.1.2/share/hadoop/common/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/common/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/*
</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/local/hadoop-3.1.2/etc/hadoop,
/usr/local/hadoop-3.1.2/share/hadoop/common/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/common/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/hdfs/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/lib/*,
/usr/local/hadoop-3.1.2/share/hadoop/yarn/*
</value>
</property>
</configuration>
(6) 修改 yarn-site.xml
1) vim /usr/local/hadoop-3.1.2/etc/hadoop/yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<!--指定yarn集群的主机名 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<!--指定mapreduce使用shuffle过程-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<!--指定rm超级管理员的访问端口-->
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<!--指定rm的内部通讯端口-->
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<!--指定rm的资源调度端口-->
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<!--指定调度队列的访问端口-->
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>10240</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>1</value>
</property>
<!--指定web ui 的访问端口-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<!--设置虚拟内存率,防止内存不足报错-->
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>2.5</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>/usr/local/hadoop-3.1.2/etc/hadoop:/usr/local/hadoop-3.1.2/share/hadoop/common/lib/*:/usr/local/hadoop-3.1.2/share/hadoop/common/*:/usr/local/hadoop-3.1.2/share/hadoop/hdfs:/usr/local/hadoop-3.1.2/share/hadoop/hdfs/lib/*:/usr/local/hadoop-3.1.2/share/hadoop/hdfs/*:/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/lib/*:/usr/local/hadoop-3.1.2/share/hadoop/mapreduce/*:/usr/local/hadoop-3.1.2/share/hadoop/yarn:/usr/local/hadoop-3.1.2/share/hadoop/yarn/lib/*:/usr/local/hadoop-3.1.2/share/hadoop/yarn/*</value>
</property>
</configuration>
(7) 修改core-site.xml
1) vim /usr/local/hadoop-3.1.2/etc/hadoop/core-site.xml
<configuration>
<!--指定Hadoop运行时产生的文件存放地址-->
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/hadoop/hdfs/tmp</value>
<description>A base for other temporary directories.</description>
</property>
<!--指定buffer的大小-->
<property>
<name>io.file.buffer.size</name>
<value>102400</value>
</property>
<!--指定hadoop所使用的文件系统scheam uri hdfs的根节点地址-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
</configuration>
(8) 修改 workers
1) vim /usr/local/hadoop-3.1.2/etc/hadoop/workers
master
hadoop01
hadoop02
hadoop03
3 修改Hadoop启动脚本(3.X)之后启动会报错,需要修改启动脚本
(1) 修改 start-dfs.sh stop-dfs.sh
1) vim /usr/local/hadoop-3.1.2/sbin/start-dfs.sh
vim /usr/local/hadoop-3.1.2/sbin/stop-dfs.sh
2) 在脚本最前面添加如下参数:
HDFS_DATANODE_USER=root
HADOOP_SECURE_DN_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
(2) 修改start-yarn.sh stop-yarn.sh
1) vim /usr/local/hadoop-3.1.2/sbin/start-yarn.sh
vim /usr/local/hadoop-3.1.2/sbin/stop-yarn.sh
2) 在脚本最前面添加如下参数
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
4 添加Hadoop到系统环境变量中
(1)vim /etc/profile
#设置Java环境
export JAVA_HOME=/usr/local/jdk1.8.0_221
export JRE_HOME=${JAVA_HOME}/jre
export HADOOP=/usr/local/hadoop-3.1.2
export CLASSPATH=.:${JAVA_HOME}/lib:${JRE_HOME}/lib
export PATH=${JAVA_HOME}/bin:$PATH
export PATH=$PATH:${HADOOP}/bin:${HADOOP}/sbin
export ZOOKEEPER_HOME=/usr/local/zookeeper-3.5.5
export PATH=${ZOOKEEPER_HOME}/bin:$PATH
export HIVE_HOME=/usr/local/hive-2.3.6
export PATH=$PATH:${HIVE_HOME}/bin
export HBASE_HOME=/usr/local/hbase-2.2.2
export PATH=$PATH:${HBASE_HOME}/bin
(2) 让配置文件生效
source /etc/profile
5 启动Hadoop集群
(1) 初始化Name Node节点
hdfs namenode -format
(2) 启动HDFS集群
start-dfs.sh
(3) 验证节点
jps
DataNode
Jps
SecondaryNameNode
NameNode
(4) 查看WebUI 界面
浏览器中输入: 服务器IP:9870
Hadoop 3.X之后HDFS默认端口为9870 2.X为50070
(5) 启动YARN集群
start-yarn.sh
1) 节点验证
jps
Jps
DataNode
ResourceManager
SecondaryNameNode
NameNode
NodeManager
2) 查看WebUI界面
浏览器中输入: 服务器IP:8088
(3)批量启动
start-all.sh
stop-all.sh