一、基础配置
关闭防火墙及SELinux(所有服务器)
[root@master ~]# systemctl stop firewalld
[root@master ~]# setenforce 0
setenforce: SELinux is disabled
[root@master ~]# vi /etc/hosts (添加映射)
192.168.200.98 master
192.168.200.99 slave1
192.168.200.100 slave2
192.168.200.110 slave3
配置ssh免密
[root@master ~]# ssh-keygen -t rsa (一路回车)
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
#####部分省略
[root@master ~]# ssh-copy-id master
[root@master ~]# ssh-copy-id slave1
[root@master ~]# ssh-copy-id slave2
[root@master ~]# ssh-copy-id slave3
[root@master ~]# scp /etc/hosts root@192.168.200.99:/etc/
[root@master ~]# scp /etc/hosts root@192.168.200.100:/etc/
[root@master ~]# scp /etc/hosts root@192.168.200.110:/etc/
二、安装服务过程(全分布)
安装JDK
[root@master opt]# ll
-rw-r--r-- 1 root root 212046774 6月 29 2019 hadoop-2.7.2.tar.gz
-rw-r--r-- 1 root root 185540433 6月 29 2019 jdk-8u131-linux-x64.tar.gz
[root@master opt]# tar xf jdk-8u131-linux-x64.tar.gz
[root@master opt]# vi /etc/profile
#JAVA
export JAVA_HOME=/opt/jdk1.8.0_131
export PATH=$PATH:$JAVA_HOME/bin
[root@master opt]# source !$
[root@master opt]# java -version
java version "1.8.0_131"
Java(TM) SE Runtime Environment (build 1.8.0_131-b11)
Java HotSpot(TM) 64-Bit Server VM (build 25.131-b11, mixed mode)
####相同步骤复制到各节点,部分内容省略
[root@master opt]# scp -r jdk1.8.0_131/ slave1:/opt/
[root@master opt]# scp /etc/profile slave1:/etc/
安装Hadoop
[root@master opt]# tar xf hadoop-2.7.2.tar.gz
[root@master opt]# cd hadoop-2.7.2/etc/hadoop/
[root@master hadoop]# vi core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:8020</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop-2.7.2/data</value>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
</configuration>
修改hdfs-site.xml
[root@master hadoop]# vi hdfs-site.xml
<configuration>
<property> 配置副本因子
<name>dfs.replication</name>
<value>3</value>
</property>
<property> 配置块大小
<name>dfs.block.size</name>
<value>134217728</value>
</property>
<property> 配置元数据的存储位置
<name>dfs.namenode.name.dir</name>
<value>file:///opt/data/dfs/name</value>
</property>
<property> 配置datanode数据存放位置
<name>dfs.datanode.data.dir</name>
<value>/opt/data/dfs/data</value>
</property>
<property> 配置fs检测目录存放位置
<name>fs.checkpoint.dir</name>
<value>/opt/data/checkpoint/dfs/cname</value>
</property>
<property> 配置hdfs的namenode的web ui地址
<name>dfs.http.address</name>
<value>master:50070</value>
</property>
<property> 配置dfs的SNN的web ui地址
<name>dfs.secondary.http.address</name>
<value>slave1:50090</value>
</property>
<property> 是否开启web操作dfs
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property> 是否启用hdfs的权限
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
修改mapred-site.xml
[root@master hadoop]# vi mapred-site.xml
<configuration>
<property> 指定mapreduce运行框架
<name>mapreduce.framework.name</name>
<value>yarn</value>
<final>true</final>
</property>
<property> 历史服务的通信地址
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property> 历史服务的web ui通信地址
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
修改yarn-site.xml
[root@master hadoop]# vi yarn-site.xml
<configuration>
<property> 指定resourcemanager所在的主机名
<name>yarn.resourcemanager.hostname</name>
<value>master</value>
</property>
<property> 指定mapreduce的shuffle
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property> 指定resourcemanager内部通信地址
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property> 指定scheduler的内部通信地址
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property> 指定rm的resource-tracker的内部通信地址
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property> 指定rm的admin的内部通信地址
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property> 指定rm的web ui地址
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property> <!-- 开启日志聚集功能 -->
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property> <!-- 设置日志聚集服务器地址 -->
<name>yarn.log.server.url</name>
<value>http://master:19888/jobhistory/logs</value>
</property>
<property> <!-- 设置日志保留时间为 7 天 -->
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
修改hadoop-env.sh
[root@master hadoop]# vi hadoop-env.sh
export JAVA_HOME=/opt/jdk1.8.0_131
添加节点信息,可写主机名
[root@master hadoop]# vi slaves
192.168.200.98
192.168.200.99
192.168.200.100
192.168.200.110
设置Hadoop变量
[root@master hadoop]# vi /etc/profile
#HADOOP
export HADOOP_HOME=/opt/hadoop-2.7.2
export PATH=$HADOOP_HOME/bin:$JAVA_HOME/bin:$PATH
将修改完配置的Hadoop拷贝到Slave节点*
[root@master opt]# scp -r hadoop-2.7.2 slave1:/opt/
[root@master opt]# scp -r hadoop-2.7.2 slave2:/opt/
[root@master opt]# scp -r hadoop-2.7.2 slave3:/opt/
格式化Namenode
[root@master hadoop]# ./hadoop namenode -format
#出现这条说明格式化成功
**INFO common.Storage: Storage directory /opt/data/dfs/name has been successfully formatted.**
部分省略.......
三、启动方式
1、全启动
[root@master sbin]# ./start-all.sh
This script is Deprecated. Instead use start-dfs.sh and start-yarn.sh
Starting namenodes on [master]
master: starting namenode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-namenode-master.out
192.168.200.98: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-master.out
192.168.200.99: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-slave1.out
192.168.200.100: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-slave2.out
192.168.200.110: starting datanode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-datanode-slave3.out
Starting secondary namenodes [slave1]
slave1: starting secondarynamenode, logging to /opt/hadoop-2.7.2/logs/hadoop-root-secondarynamenode-slave1.out
starting yarn daemons
starting resourcemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-resourcemanager-master.out
192.168.200.98: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-master.out
192.168.200.99: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-slave1.out
192.168.200.110: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-slave3.out
192.168.200.100: starting nodemanager, logging to /opt/hadoop-2.7.2/logs/yarn-root-nodemanager-slave2.out
2、模式启动
[root@master sbin]# ./start-dfs.sh
[root@master sbin]# ./start-yarn.sh
3、进程启动
[root@master sbin]# hadoop-daemon.sh start namenode
[root@master sbin]# hadoop-daemons.sh start datanode
[root@master sbin]# yarn-daemon.sh start namenode
[root@master sbin]# yarn-daemons.sh start datanode
四、测试
访问HDFS 50070页面
访问YARN 8088 页面
五、命令行
1、HDFS
[root@master hadoop]# hadoop fs -mkdir /test #HDFS创建目录
[root@master hadoop]# hadoop fs -ls /
Found 1 items
drwxr-xr-x - root supergroup 0 2022-12-09 00:00 /test
[root@master hadoop]# hadoop fs -put /opt/hadoop-2.7.2/etc/hadoop/core-site.xml /test/ #上传文件至HDFS
[root@master hadoop]# hadoop fs -ls /test/
Found 1 items
-rw-r--r-- 3 root supergroup 441 2022-12-09 00:01 /test/core-site.xml
[root@master hadoop]# hadoop fs -du -s -h /test/core-site.xml #查看文件占用
441 /test/core-site.xml
命令行操作可参照官方地址:https://hadoop.apache.org/docs/r1.0.4/cn/commands_manual.html
2、YARN
[root@master hadoop]# yarn application -list #查看任务
22/12/09 00:05:40 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.200.98:8032
Total number of applications (application-types: [] and states: [SUBMITTED, ACCEPTED, RUNNING]):0
Application-Id Application-Name Application-Type User Queue State Final-State Progress Tracking-URL
[root@master hadoop]# yarn node -list -all #查看节点状态
22/12/09 00:09:33 INFO client.RMProxy: Connecting to ResourceManager at master/192.168.200.98:8032
Total Nodes:4
Node-Id Node-State Node-Http-Address Number-of-Running-Containers
slave3:36736 RUNNING slave3:8042 0
slave2:33586 RUNNING slave2:8042 0
master:35013 RUNNING master:8042 0
slave1:41883 RUNNING slave1:8042 0
**查看程序状态**
yarn application -list -appStates FINISHED
根据Application状态过滤:yarn application -list -appStates (所有状态:ALL、NEW、NEW_SAVING、SUBMITTED、ACCEPTED、RUNNING、FINISHED、FAILED、KILLED)
**Kill掉Application**
yarn application -kill application_1612577921195_0001
**查看任务日志**
例:查询Application日志:yarn logs -applicationId <ApplicationId>
yarn logs -applicationId application_1612577921195_0001
**查看所有容器**
列出所有Container:yarn container -list <ApplicationAttemptId>
yarn container -list appattempt_1612577921195_0001_000001
**查看容器状态**
打印Container状态: yarn container -status <ContainerId>
yarn container -status container_1612577921195_0001_01_000001