一、Hadoop平台安装
1.配置 Linux 系统基础环境
(1)查看服务器的 IP 地址
[root@localhost ~]#
ip add show
(2)设置服务器的主机名称
[root@localhost ~]#
hostnamectl set-hostname master
[root@localhost ~]#
bash
[root@master ~]# hostname
master
(3)绑定主机名与 IP 地址
[root@master ~]# vi /etc/hosts
(4)查看 SSH 服务状态
[root@master ~]#
systemctl status sshd
(5)关闭防火墙
[root@master ~]#
systemctl stop firewalld
[root@master ~]#
systemctl status firewalld
[root@master ~]#
systemctl disable firewalld
(6)创建 hadoop 用户
[root@master ~]#
useradd hadoop
[root@master ~]#
echo "1" |passwd --stdin hadoop
2.安装 JAVA 环境
(1)下载 JDK 安装包
(2)卸载自带 OpenJDK
[root@master ~]#
rpm -qa | grep java
[root@master ~]#
rpm -qa | grep java
[root@master ~]#
java --version
(3)安装 JDK
[root@master ~]#
tar -zxvf /opt/software/jdk-8u152-linux-x64.tar.gz -C
/usr/local/src/
[root@master ~]#
ls /usr/local/src/
jdk1.8.0_152
(4)设置 JAVA 环境变量
[root@master ~]#
vi /etc/profile
3.安装 Hadoop 软件
(1)安装 Hadoop 软件
[root@master ~]#
tar -zxvf /opt/software/hadoop-2.7.1.tar.gz -C
/usr/local/src/
[root@master ~]#
ll /usr/local/src/
[root@master ~]#
ll /usr/local/src/hadoop-2.7.1/
(2)配置 Hadoop 环境变量
[root@master ~]#
vi /etc/profile
[root@master ~]#
source /etc/profile
[root@master ~]#
hadoop
(3)修改目录所有者和所有者组
[root@master ~]#
chown -R hadoop:hadoop /usr/local/src/
[root@master ~]#
ll /usr/local/src/
4.安装单机版 Hadoop 系统
(1)切换到 hadoop 用户
[root@master hadoop-2.7.1]#
su - hadoop
[hadoop@master ~]$
id
(2)创建输入数据存放目录
[hadoop@master ~]$
mkdir ~/input
[hadoop@master ~]$
ls
(3)创建数据输入文件
[hadoop@master ~]$
vi input/data.txt
(4)测试 MapReduce 运行
[hadoop@master ~]$
hadoop jar /usr/local/src/hadoop-
2.7.1/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.1.jar
wordcount ~/input/data.txt ~/output
[hadoop@master ~]$
ll output/
[hadoop@master ~]$
cat output/part-r-00000
4.Hadoop平台环境配置
修改
slave1
机器主机名
[root@localhost ~]#
hostnamectl set-hostname slave1
[root@localhost ~]#
bash
[root@slave1 ~]#
修改
slave2
机器主机名
[root@localhost ~]#
hostnamectl set-hostname slave2
[root@localhost ~]#
bash
[root@slave2 ~]#
[root@master ~]# vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.47.140 master
192.168.47.141 slave1
192.168.47.142 slave2
[root@slave1 ~]# vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.47.140 master
192.168.47.141 slave1
192.168.47.142 slave2
[root@slave2 ~]# vi /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
28
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.47.140 master
192.168.47.141 slave1
192.168.47.142 slave2
5.生成 SSH 密钥
(1)每个节点安装和启动 SSH 协议
[root@master ~]#
rpm -qa | grep openssh
openssh-server-7.4p1-11.el7.x86_64
openssh-7.4p1-11.el7.x86_64
openssh-clients-7.4p1-11.el7.x86_64
[root@master ~]#
rpm -qa | grep rsync
rsync-3.1.2-11.el7_9.x86_64
(2)切换到 hadoop 用户
[root@master ~]#
su - hadoop
[hadoop@master ~]$
[root@slave1 ~]#
useradd hadoop
[root@slave1 ~]#
su - hadoop
[hadoop@slave1 ~]$
[root@slave2 ~]#
useradd hadoop
[root@slave2 ~]#
su - hadoop
[hadoop@slave2 ~]$
(3)每个节点生成秘钥对
[hadoop@master ~]$
ssh-keygen -t rsa
(4)查看"/home/hadoop/"下是否有".ssh"文件夹,且".ssh"文件下是否有两个刚
生产的无密码密钥对
[hadoop@master ~]$
ls ~/.ssh/
id_rsa id_rsa.pub
(5)将 id_rsa.pub 追加到授权 key 文件中
#master
[hadoop@master ~]$
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[hadoop@master ~]$
ls ~/.ssh/
authorized_keys id_rsa id_rsa.pub
#slave1
[hadoop@slave1 ~]$
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[hadoop@slave1 ~]$
ls ~/.ssh/
authorized_keys id_rsa id_rsa.pub
#slave2
[hadoop@slave2 ~]$
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[hadoop@slave2 ~]$
ls ~/.ssh/
authorized_keys id_rsa id_rsa.pub
(6)修改文件"authorized_keys"权限
#master
[hadoop@master ~]$
chmod 600 ~/.ssh/authorized_keys
[hadoop@master ~]$
ll ~/.ssh/
总用量
12
-rw-------. 1 hadoop hadoop 395 11
月
14 16:18 authorized_keys
-rw-------. 1 hadoop hadoop 1679 11
月
14 16:14 id_rsa
-rw-r--r--. 1 hadoop hadoop 395 11
月
14 16:14 id_rsa.pub
#slave1
[hadoop@slave1 ~]$
chmod 600 ~/.ssh/authorized_keys
[hadoop@slave1 ~]$
ll ~/.ssh/
总用量
12
-rw-------. 1 hadoop hadoop 395 11
月
14 16:18 authorized_keys
-rw-------. 1 hadoop hadoop 1675 11
月
14 16:14 id_rsa
-rw-r--r--. 1 hadoop hadoop 395 11
月
14 16:14 id_rsa.pub
#slave2
[hadoop@slave2 ~]$
chmod 600 ~/.ssh/authorized_keys
[hadoop@slave2 ~]$
ll ~/.ssh/
总用量
12
-rw-------. 1 hadoop hadoop 395 11
月
14 16:19 authorized_keys
-rw-------. 1 hadoop hadoop 1679 11
月
14 16:15 id_rsa
-rw-r--r--. 1 hadoop hadoop 395 11
月
14 16:15 id_rsa.pub
(7)配置 SSH 服务
master,slave1,slave2分别运行
[hadoop@master ~]$
su - root
密码:
上一次登录:一
11
月
14 15:48:10 CST 2022
从
192.168.47.1pts/1
上
[root@master ~]#
vi /etc/ssh/sshd_config
PubkeyAuthentication yes #
找到此行,并把
#
号注释删除。
(8)重启 SSH 服务
[root@master ~]#
systemctl restart sshd
(9)切换到 hadoop 用户
[root@master ~]#
su - hadoop
(10)验证 SSH 登录本机
[hadoop@master ~]$
ssh localhost
6.交换 SSH 密钥
(1)将 Master 节点的公钥 id_rsa.pub 复制到每个 Slave 点
hadoop 用户登录,通过 scp 命令实现密钥拷贝
[hadoop@master ~]$
scp ~/.ssh/id_rsa.pub hadoop@slave1:~/
hadoop@slave1's password:
id_rsa.pub 100% 395 303.6KB/s 00:00
[hadoop@master ~]$
scp ~/.ssh/id_rsa.pub hadoop@slave2:~/
The authenticity of host 'slave2 (192.168.47.142)' can't be established.
ECDSA key fingerprint is
SHA256:KvO9HlwdCTJLStOxZWN7qrfRr8FJvcEw2hzWAF9b3bQ.
ECDSA key fingerprint is MD5:07:91:56:9e:0b:55:05:05:58:02:15:5e:68:db:be:73.
Are you sure you want to continue connecting (yes/no)? yes
Warning: Permanently added 'slave2,192.168.47.142' (ECDSA) to the list of known
hosts.
hadoop@slave2's password:
id_rsa.pub 100% 395 131.6KB/s 00:00
(2)在每个 Slave 节点把 Master 节点复制的公钥复制到authorized_keys 文件
hadoop
用户登录
slave1
和
slave2
节点,执行命令。
[hadoop@slave1 ~]$
cat ~/id_rsa.pub >>~/.ssh/authorized_keys
[hadoop@slave2 ~]$
cat ~/id_rsa.pub >>~/.ssh/authorized_keys
(3)在每个 Slave 节点删除 id_rsa.pub 文件
[hadoop@slave1 ~]$
rm -rf ~/id_rsa.pub
[hadoop@slave2 ~]$
rm -rf ~/id_rsa.pub
(4)将每个 Slave 节点的公钥保存到 Master
[hadoop@slave1 ~]$
scp ~/.ssh/id_rsa.pub hadoop@master:~/
[hadoop@slave2 ~]$
scp ~/.ssh/id_rsa.pub hadoop@master:~/
在
Master
节点把从
Slave
节点复制的公钥复制到
authorized_keys
文件
[hadoop@master ~]$
cat ~/id_rsa.pub >>~/.ssh/authorized_keys
在
Master
节点删除
id_rsa.pub
文件
[hadoop@master ~]$
rm -rf ~/id_rsa.pub
7.验证 SSH 无密码登录
(1)查看 Master 节点 authorized_keys 文件
[hadoop@master ~]$
cat ~/.ssh/authorized_keys
(2)查看 Slave 节点 authorized_keys 文件
[hadoop@slave1 ~]$
cat ~/.ssh/authorized_keys
[hadoop@slave2 ~]$
cat ~/.ssh/authorized_keys
(3)验证 Master 到每个 Slave 节点无密码登录
[hadoop@master ~]$
ssh slave1
Last login: Mon Nov 14 16:34:56 2022
[hadoop@slave1 ~]$
[hadoop@master ~]$
ssh slave2
Last login: Mon Nov 14 16:49:34 2022 from 192.168.47.140
[hadoop@slave2 ~]$
(4):验证两个 Slave 节点到 Master 节点无密码登录
[hadoop@slave1 ~]$
ssh master
Last login: Mon Nov 14 16:30:45 2022 from ::1
[hadoop@master ~]$
[hadoop@slave2 ~]$
ssh master
Last login: Mon Nov 14 16:50:49 2022 from 192.168.47.141
[hadoop@master ~]$
(5)配置两个子节点slave1、slave2的JDK环境
[root@master ~]#
cd /usr/local/src/
[root@master src]# ls
hadoop-2.7.1 jdk1.8.0_152
[root@master src]#
scp -r jdk1.8.0_152 root@slave1:/usr/local/src/
[root@master src]#
scp -r jdk1.8.0_152 root@slave2:/usr/local/src/
#slave1
[root@slave1 ~]#
ls /usr/local/src/
jdk1.8.0_152
[root@slave1 ~]#
vi /etc/profile
#
此文件最后添加下面两行
export JAVA_HOME=/usr/local/src/jdk1.8.0_152
export PATH=$PATH:$JAVA_HOME/bin
[root@slave1 ~]#
source /etc/profile
[root@slave1 ~]#
java -version
java version "1.8.0_152"
Java(TM) SE Runtime Environment (build 1.8.0_152-b16)
Java HotSpot(TM) 64-Bit Server VM (build 25.152-b16, mixed mode)
#slave2
[root@slave2 ~]#
ls /usr/local/src/
jdk1.8.0_152
[root@slave2 ~]#
vi /etc/profile
#
此文件最后添加下面两行
export JAVA_HOME=/usr/local/src/jdk1.8.0_152
export PATH=$PATH:$JAVA_HOME/bin
[root@slave2 ~]#
source /etc/profile
、
[root@slave2 ~]#
java -version
java version "1.8.0_152"
Java(TM) SE Runtime Environment (build 1.8.0_152-b16)
Java HotSpot(TM) 64-Bit Server VM (build 25.152-b16, mixed mode)
二.Hadoop集群运行
1.Hadoop文件参数配置
在
Master
节点上安装
Hadoop
(1)将 hadoop-2.7.1 文件夹重命名为 Hadoop
[root@master ~]#
cd /usr/local/src/
[root@master src]#
mv hadoop-2.7.1 hadoop
[root@master src]#
ls
hadoop jdk1.8.0_152
(2)配置 Hadoop 环境变量
[root@master src]#
yum install -y vim
[root@master src]#
vim /etc/profile
[root@master src]#
tail -n 4 /etc/profile
export JAVA_HOME=/usr/local/src/jdk1.8.0_152
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/src/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
(3)使配置的 Hadoop 的环境变量生效
[root@master src]#
su - hadoop
上一次登录:一
2
月
28 15:55:37 CST 2022
从
192.168.41.143pts/1
上
[hadoop@master ~]$
source /etc/profile
[hadoop@master ~]$
exit
登出
(4)执行以下命令修改 hadoop-env.sh 配置文件
[root@master src]#
cd /usr/local/src/hadoop/etc/hadoop/
[root@master hadoop]#
vim hadoop-env.sh #
修改以下配置
export JAVA_HOME=/usr/local/src/jdk1.8.0_152
2.配置 hdfs-site.xml 文件参数
[root@master hadoop]#
vim hdfs-site.xml #
编辑以下内容
[root@master hadoop]#
tail -n 14 hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/src/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/src/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
39
</configuration>
3.配置 core-site.xml 文件参数
[root@master hadoop]#
vim core-site.xml
#
编辑以下内容
[root@master hadoop]#
tail -n 14 core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://192.168.47.140:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/usr/local/src/hadoop/tmp</value>
</property>
</configuration>
4.配置 mapred-site.xml
[root@master hadoop]#
pwd
/usr/local/src/hadoop/etc/hadoop
[root@master hadoop]#
cp mapred-site.xml.template mapred-site.xml
[root@master hadoop]#
vim mapred-site.xml #
添加以下配置
[root@master hadoop]#
tail -n 14 mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
5.配置 yarn-site.xml
[root@master hadoop]#
vim yarn-site.xml #
添加以下配置
[root@master hadoop]#
tail -n 32 yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address</name>
<value>master:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.auxservices.mapreduce.shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
</configuration>
Hadoop 其他相关配置
1. 配置 masters 文件
[root@master hadoop]#
vim masters
[root@master hadoop]#
cat masters
192.168.47.140
2. 配置 slaves 文件
[root@master hadoop]#
vim slaves
[root@master hadoop]#
cat slaves
192.168.47.141
192.168.47.142
3. 新建目录
[root@master hadoop]#
mkdir /usr/local/src/hadoop/tmp
[root@master hadoop]#
mkdir /usr/local/src/hadoop/dfs/name -p
[root@master hadoop]#
mkdir /usr/local/src/hadoop/dfs/data -p
4. 修改目录权限
[root@master hadoop]#
chown -R hadoop:hadoop /usr/local/src/hadoop/
5. 同步配置文件到 Slave 节点
[root@master ~]#
scp -r /usr/local/src/hadoop/ root@slave1:/usr/local/src/
[root@master ~]#
scp -r /usr/local/src/hadoop/ root@slave2:/usr/local/src/
slave1,slave2配置
[root@slave1 ~]#
yum install -y vim
[root@slave1 ~]#
vim /etc/profile
[root@slave1 ~]#
tail -n 4 /etc/profile
export JAVA_HOME=/usr/local/src/jdk1.8.0_152
export PATH=$PATH:$JAVA_HOME/bin
export HADOOP_HOME=/usr/local/src/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
[root@slave1 ~]#
chown -R hadoop:hadoop /usr/local/src/hadoop/
[root@slave1 ~]#
su - hadoop
上一次登录:四
2
月
24 11:29:00 CST 2022
从
192.168.41.148pts/1
上
[hadoop@slave1 ~]$
source /etc/profile
大数据平台集群运行
1.hadoop 集群运行
配置 Hadoop
格式化
(1):NameNode 格式化
[root@master ~]#
su – hadoop
[hadoop@master ~]#
cd /usr/local/src/hadoop/
[hadoop@master hadoop]$
bin/hdfs namenode –format
(2)启动 NameNode
[hadoop@master hadoop]$
hadoop-daemon.sh start namenode
实验任务二:查看 Java 进程
[hadoop@master hadoop]$
jps
(1)slave节点 启动 DataNode
[hadoop@slave1 hadoop]$
hadoop-daemon.sh start datanode
[hadoop@slave2 hadoop]$
hadoop-daemon.sh start datanode
[hadoop@slave1 hadoop]$
jps
3557 DataNode
3725 Jps
[hadoop@slave2 hadoop]$
jps
3557 DataNode
3725 Jps
(2)启动 SecondaryNameNode
[hadoop@master hadoop]$
hadoop-daemon.sh start secondarynamenode
[hadoop@master hadoop]$
jps
(3)查看 HDFS 数据存放位置
[hadoop@master hadoop]$
ll dfs/
[hadoop@master hadoop]$
ll ./tmp/dfs
实验任务三:查看 HDFS 的报告
[hadoop@master sbin]$
hdfs dfsadmin -report
实验任务四:使用浏览器查看节点状态
在浏览器的地址栏输入
http://master:50070
,进入页面可以查看
NameNode
和
DataNode
信息
[hadoop@master hadoop]$
stop-dfs.sh
[hadoop@master hadoop]$
start-dfs.sh
(1)在 HDFS 文件系统中创建数据输入目录
确保
dfs
和
yarn
都启动成功
[hadoop@master hadoop]$
start-yarn.sh
[hadoop@master hadoop]$
jps
[hadoop@master hadoop]$
hdfs dfs -mkdir /input
[hadoop@master hadoop]$
hdfs dfs -ls /
(2)将输入数据文件复制到 HDFS 的/input 目录中
[hadoop@master hadoop]$
cat ~/input/data.txt
Hello World
Hello Hadoop
Hello Huasan
[hadoop@master hadoop]$
hdfs dfs -put ~/input/data.txt /input
[hadoop@master hadoop]$
hdfs dfs -ls /input
[hadoop@master hadoop]$
hdfs dfs -mkdir /output
[hadoop@master hadoop]$
hdfs dfs -ls /
[hadoop@master hadoop]$
hdfs dfs -rm -r -f /output
[hadoop@master hadoop]$ hadoop jar share/hadoop/mapreduce/hado
op
-
-
mapreduce-examples-2.7.1.jar wordcount /input/data.txt /output
[hadoop@master hadoop]$
hdfs dfs -cat /output/part-r-00000
实验任务五:停止 Hadoop
(1)停止 yarn
[hadoop@master hadoop]$
stop-yarn.sh
(2)停止 DataNode
[hadoop@slave1 hadoop]$
hadoop-daemon.sh stop
[hadoop@slave2 hadoop]$
hadoop-daemon.sh stop datanode
(3)停止 NameNode
[hadoop@master hadoop]$
hadoop-daemon.sh stop namenode
(4)停止 SecondaryNameNode
[hadoop@master hadoop]$
hadoop-daemon.sh stop secondarynamenode
(5)查看 JAVA 进程,确认 HDFS 进程已全部关闭
[hadoop@master hadoop]$
jps