1. 添加hadoop用户
[root@localhost ~]# useradd hadoop
[root@localhost ~]# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password is shorter than 8 characters
Retype new password:
passwd: all authentication tokens updated successfully.
2.修改hostname 分别修改
[root@localhost ~]# hostnamectl set-hostname hmaster
hslave1 hslave2
3.配置ssh免密码登录
在hadoop用户下
在hadoop60-hadoop70主机上都执行ssh-keygen -t rsa 生成密钥
[root@localhost ~]# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:OF0zlA3rQTykeuZuaRafnfxEJzbj0lo+ifIPuD2OywE root@localhost.localdomain
The key's randomart image is:
+---[RSA 2048]----+
| o=+ |
| ++.. |
| . *. |
| + o + |
| + E . * .|
| =.. . = = |
| .+o+oo=. |
| .=.+=*Bo |
| +. ===++. |
+----[SHA256]-----+
在每台机子主机建立authorized_keys
[root@localhost .ssh]# ssh-copy-id -i id_rsa.pub hmaster
[root@localhost .ssh]# ssh-copy-id -i id_rsa.pub hslave1
[root@localhost .ssh]# ssh-copy-id -i id_rsa.pub hslave2
cat id_rsa.pub >>./authorized_keys
复制内容到authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCgQUUP8RMnwZBTQ3lLFYLjK2kRZNmvwsODDxowL2ictDMvwcx6WcdAaqe+Bip2t/WTTz1cMuOc7AJVAO3gFxH4j8xgaqdYXjlDpukLXSwed4F/oQh9PJ0hFxT97SzaAL0XoQkbP0jWsFDTjtiKg8PnbncPTxGFWheyEvKmczxBQkz7ZVycnjFnB7YOxdLoeiyrxuic+w4XrMzR/npoZbVweVhLNMq1bnz55Sm1VTfZ7TR+O/nZtEieBb/D1akmaHzW2pmCh4he8immrF+n7Ptzmk7sts0QOxKvrhazwR1l0heP8azksygtwX1kC5c5SYXPsGkIEEFTAKNUTesp3NZH root@hmaster
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKEIULfs5XcQr5khs3jqJCO346npeWVAepaDyAuCU+LQXcSs6ERdNZeBhuKDVjH0DL31Il9T3veU4v7d4+PLSfH0rUXumZqM9XrZPGv5dt1DP7FGCU9g5c5PCfAPMEmx4QczMNuwprASaELdUlhF8yGIaYsIm3ESer3fnssqFdG+qP1AYqJJ/Oz4Om2+HeopiUuYtcjf1gRAcP8FlBjvLCVxcyxQjBKksJvef3fZ1Ib0zH2b43/2KWzTJIzQZYJqdH1bv+V4M5zVkp/+tsz5/NdaUQV7ViYpStjxlJnoZA6s9vtPaBGPFFkIWKpNZBGjBGgFvjaGorKCv8PgpAVSi7 root@hslave1
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC7E/JHw7SJYwcnjrzeCMo/FSecihx0c4mmmkU2ebWuWZL+3aug/zZ8IqSmajhFp8U8rY2Vd0eT8E2GgtTWsb0Fq1WwGgXeXCJE44hf7+RxaUzSnhSz/IlsSCjyXqKVl3ZNXxvFjHTcOMa7AuYNIDkibXaytfcLkQS+8f6dpFQTwrAGTph2djTuB/emEmwV2w1l+TmxljgQ/xAydskpSWKcKMFHqCASXdhcn5ON7QD4y3yDXaeU6WJYhNECKoDlU7MOnUckS7C9s1spOL+Wfa+l16t3HU9g7osqLEVvi7xIoQev9R7QV5Zme/LgHFPHYZsA6rX9WFinU0cjDQqLESyv root@hslave2
[root@localhost .ssh]# ssh hmaster
[root@localhost .ssh]# ssh hslave1
[root@localhost .ssh]# ssh hslave2
hmaster
[root@hmaster .ssh]# cat id_rsa.pub >> authorized_keys
hslave1
[root@hslave1 .ssh]# scp id_rsa.pub hadoop@hmaster:/home/hadoop/.ssh/111
hslave2
[root@hslave1 .ssh]# scp id_rsa.pub hadoop@hmaster:/home/hadoop/.ssh/222
然后在hmaster上
cat 111 >> authorized_keys
cat 222 >> authorized_keys
然后在hmaster上执行
[root@hmaster .ssh]# scp authorized_keys hadoop@hslave1:/home/hadoop/.ssh/authorized_keys
[root@hmaster .ssh]# scp authorized_keys hadoop@hslave2:/home/hadoop/.ssh/authorized_keys
权限设置
chmod 700 .ssh
chmod 600 .ssh/*
4. 修改hosts文件
[root@localhost ~]# vi /etc/hosts
192.168.84.128 hmaster
192.168.84.131 hslave1
192.168.84.132 hslave2
5.关闭主机防火墙
# 关闭防火墙
systemctl stop firewalls.service
#禁止防火墙开机启动
systemctl disable firewalls.service
6.关闭 selinux
vi /etc/sysconfig/selinux
修改 SELINUX 值为disabled
getenforce 命令查看 selinux状态为disabled
7.安装jdk
8.安装hadoop
master slave1 slave2
NameNode ResourceManage
DataNode DataNode DataNode
NodeManager NodeManager NodeManager
HistoryServer SecondaryNameNode
切换用户 hadoop
切换到 cd /home/hadoop 目录下
tar -zxvf hadoop-2.8.5.tar.gz
修改 hadoop 解压缩后 etc目录下 hadoop-env.sh、mapred-env.sh、yarn-env.sh文件中的JDK路径
export JAVA_HOME=/usr/local/java/jdk1.8.0_91/
修改core-site.xml
<configuration>
<!-- 指定NameNode主机和hdfs端口 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hmaster:9000</value>
</property>
<!-- 指定tmp文件夹路径 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/data/tmp</value>
</property>
</configuration>
[hadoop@hmaster ~]$ mkdir -p data/tmp
配置hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!--注释掉
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hslave2:50090</value>
</property>-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/data/dfs/namenode</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/data/dfs/datanode</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
<description>need not permissions</description>
</property>
配置slaves
hmaster
hslave1
hslave2
配置 yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hmaster</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
配置mapred-site.xml
[hadoop@hmaster hadoop]$ cp mapred-site.xml.template mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hmaster:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hmaster:19888</value>
</property>
scp -r /home/hadoop/hadoop-2.8.5 hadoop@hslave1:/home/hadoop/
scp -r /home/hadoop/data hadoop@hslave1:/home/hadoop/
scp -r /home/hadoop/hadoop-2.8.5 hadoop@hslave2:/home/hadoop/
scp -r /home/hadoop/data hadoop@hslave2:/home/hadoop/
[hadoop@hslave1 hadoop-2.8.5]$ mkdir logs
9.配置Hadoop 环境变量
su root
vi /etc/profile
添加如下配置:
#hadoop
export HADOOP_HOME=/home/hadoop/hadoop-2.8.5
export PATH=${JAVA_HOME}/bin:${MYSQL_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$PATH
修改完成,保存后执行 source /etc/profile 命令,然后scp命令,拷贝/etc/profile 文件至 slave1和 slave2 主机。
scp /etc/profile slave1:/etc/profile
scp /etc/profile slave2:/etc/profile
同样在slave1和slave2 主机上执行 source /etc/profile 重新加载配置信息。
10.启动集群
$HADOOP_HOME/bin/hdfs namenode –format
启动HDFS
sbin/start-dfs.sh
启动YARN
ssh slave1
$HADOOP_HOME/sbin/start-yarn.sh
$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager
启动日志服务器
[hadoop@master ~]$ $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver
关闭
start-all.sh
或者
start-dfs.sh
start-yarn.sh
查看HDFS Web页面
地址为 NameNode 进程运行主机ip,端口为50070,http://192.168.84.128:50070 。
4.5 查看YARN Web页面
地址为ResourceManager 进程运行主机,http://192.168.84.128:8088
4.6 查看JobHistory Web 页面
地址为JobHistoryServer 进程运行主机ip,端口为19888,通过配置文件查看自己的JobHistory web端口,http://192.168.84.128:19888
问题
方法1:停掉集群,删除问题节点的data目录下的所有内容。即hdfs-site.xml文件中配置的dfs.data.dir目录。重新格式化namenode。
方法2:先停掉集群,然后将datanode节点目录/dfs/data/current/VERSION中的修改为与namenode一致即可
5. 测试Job
5.1 准备mapreduce输入文件wc.input
[hadoop@master ~]$ cat wc.input
hadoop mapreduce hive
hbase spark storm
sqoop hadoop hive
spark hadoop
hdfs dfs -ls /input
5.2 在HDFS创建输入目录input
[hadoop@master ~]$ hdfs dfs -mkdir /input
5.3 将wc.input 上传到HDFS
[hadoop@master ~]$ hdfs dfs -put wc.input /input
5.4 运行hadoop自带的mapreduce Demo
[hadoop@master ~]$ yarn jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.5.jar wordcount /input/wc.input /output
因为/etc/profile文件中已配置过hadoop环境变量,所以可以直接运行yarn命令,如下所示为执行结果。
5.5 查看输出文件
[hadoop@master ~]$ hdfs dfs -ls /output/
[hadoop@master ~]$ hdfs dfs -cat /output/part-r-00000