1.安装前准备
1.1 安装jdk,配置jdk环境变量
vim /etc/profile
# jdk setting
export JAVA_HOME=/data/base/jdk1.8.0_211
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$CLASSPATH:$JAVA_HOME/lib:$JRE_HOME/lib
export PATH=$PATH:$JAVA_HOME/bin:$JRE_HOME/bin
source /etc/profile
1.2 关闭防火墙
systemctl stop firewalld #关闭防火墙
systemctl disable firewalld #禁用防火墙
1.3 设置主机名
hostname master #设置临时主机名
vim /etc/hostname #设置永久主机名
1.4 设置hosts
vim /etc/hosts
10.16.60.62 master
1.5 配置免密登录
ssh-keygen -t rsa # 生成公钥
#将生成的公钥添加到免密登陆keys中(主节点)
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
1.6 配置时钟同步
yum -y install ntpdate
ntpdate -u ntp.aliyun.com
# 配置定时任务
vi /etc/crontab
*/10 * * * * /usr/sbin/ntpdate ntp.aliyun.com
2.安装hadoop
2.1下载tar包
wget https://mirrors.tuna.tsinghua.edu.cn/apache/hadoop/common/hadoop-3.3.2/hadoop-3.3.2.tar.gz --no-check-certificate
tar -zxvf hadoop-3.3.2.tar.gz
安装目录/data/cmpt/hadoop-3.3.2
2.2 配置hadoop环境变量
vim /etc/profile
export HADOOP_HOME=/data/cmpt/hadoop-3.3.2
source /etc/profile
2.3 配置etc/hadoop/hadoop-env.sh
cd /data/cmpt/hadoop-3.3.2/etc/hadoop
vim hadoop-env.sh
export JAVA_HOME=/data/base/jdk1.8.0_211
export HADOOP_HOME=
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HADOOP_LOG_DIR=${HADOOP_HOME}/logs
export HADOOP_PID_DIR=${HADOOP_HOME}/pid
2.4 配置etc/hadoop/core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/tmp/hadoop</value>
</property>
</configuration>
2.5 配置etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>master:50090</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/db/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/db/hadoop/dfs/data</value>
</property>
</configuration>
2.6 配置etc/hadoop/mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/data/cmpt/hadoop-3.3.2</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/data/cmpt/hadoop-3.3.2</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/data/cmpt/hadoop-3.3.2</value>
</property>
</configuration>
2.7 配置etc/hadoop/yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.env-whitelist</name>
<value>JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_MAPRED_HOME</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs/</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>4096</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>8</value>
</property>
<!-- 分配给每个Container(容器)的最小执行内存 -->
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>128</value>
</property>
</configuration>
2.8 修改启动脚本
vim start-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim stop-dfs.sh
HDFS_DATANODE_USER=root
HDFS_DATANODE_SECURE_USER=hdfs
HDFS_NAMENODE_USER=root
HDFS_SECONDARYNAMENODE_USER=root
vim start-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
vim stop-yarn.sh
YARN_RESOURCEMANAGER_USER=root
HADOOP_SECURE_DN_USER=yarn
YARN_NODEMANAGER_USER=root
2.9 格式化namenode
hdfs namenode -format
common.Storage: Storage directory /data/db/hadoop/dfs/name has been successfully formatted.
3. 启动
cd /data/cmpt/hadoop-3.3.2/sbin
./start-all.sh
./stop-all.sh
cd /data/cmpt/hadoop-3.3.2/bin
./mapred --daemon start historyserver
# 返回信息
[root@master bin]# jps
16229 ResourceManager
16405 NodeManager
61493 Jps
61303 JobHistoryServer
15001 NameNode
15546 SecondaryNameNode
15195 DataNode
4 查看信息
# hdfs存储信息
http://10.16.60.62:9870/
# yarn资源信息
http://10.16.60.62:8088/
# yarn 应用列表
[root@master bin]# yarn app --list
2023-08-02 16:33:34,279 INFO client.DefaultNoHARMFailoverProxyProvider: Connecting to ResourceManager at master/10.16.60.62:8032
Total number of applications (application-types: [], states: [SUBMITTED, ACCEPTED, RUNNING] and tags: []):0
Application-Id Application-Name Application-Type User Queue State Final-State