centos7安装hadoop

1. 添加hadoop用户
[root@localhost ~]# useradd hadoop
[root@localhost ~]# passwd hadoop
Changing password for user hadoop.
New password:
BAD PASSWORD: The password is shorter than 8 characters
Retype new password:
passwd: all authentication tokens updated successfully.

2.修改hostname    分别修改
[root@localhost ~]# hostnamectl set-hostname hmaster
hslave1    hslave2

3.配置ssh免密码登录
    在hadoop用户下
    在hadoop60-hadoop70主机上都执行ssh-keygen -t rsa 生成密钥
    [root@localhost ~]# ssh-keygen -t rsa
Generating public/private rsa key pair.
Enter file in which to save the key (/root/.ssh/id_rsa):
Created directory '/root/.ssh'.
Enter passphrase (empty for no passphrase):
Enter same passphrase again:
Your identification has been saved in /root/.ssh/id_rsa.
Your public key has been saved in /root/.ssh/id_rsa.pub.
The key fingerprint is:
SHA256:OF0zlA3rQTykeuZuaRafnfxEJzbj0lo+ifIPuD2OywE root@localhost.localdomain
The key's randomart image is:
+---[RSA 2048]----+
|         o=+     |
|         ++..    |
|        . *.     |
|       + o +     |
|      + E .   * .|
|       =.. . = = |
|        .+o+oo=. |
|       .=.+=*Bo  |
|       +. ===++. |
+----[SHA256]-----+

在每台机子主机建立authorized_keys
[root@localhost .ssh]# ssh-copy-id -i id_rsa.pub hmaster
[root@localhost .ssh]# ssh-copy-id -i id_rsa.pub hslave1
[root@localhost .ssh]# ssh-copy-id -i id_rsa.pub hslave2
cat id_rsa.pub >>./authorized_keys

复制内容到authorized_keys
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCgQUUP8RMnwZBTQ3lLFYLjK2kRZNmvwsODDxowL2ictDMvwcx6WcdAaqe+Bip2t/WTTz1cMuOc7AJVAO3gFxH4j8xgaqdYXjlDpukLXSwed4F/oQh9PJ0hFxT97SzaAL0XoQkbP0jWsFDTjtiKg8PnbncPTxGFWheyEvKmczxBQkz7ZVycnjFnB7YOxdLoeiyrxuic+w4XrMzR/npoZbVweVhLNMq1bnz55Sm1VTfZ7TR+O/nZtEieBb/D1akmaHzW2pmCh4he8immrF+n7Ptzmk7sts0QOxKvrhazwR1l0heP8azksygtwX1kC5c5SYXPsGkIEEFTAKNUTesp3NZH root@hmaster
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQDKEIULfs5XcQr5khs3jqJCO346npeWVAepaDyAuCU+LQXcSs6ERdNZeBhuKDVjH0DL31Il9T3veU4v7d4+PLSfH0rUXumZqM9XrZPGv5dt1DP7FGCU9g5c5PCfAPMEmx4QczMNuwprASaELdUlhF8yGIaYsIm3ESer3fnssqFdG+qP1AYqJJ/Oz4Om2+HeopiUuYtcjf1gRAcP8FlBjvLCVxcyxQjBKksJvef3fZ1Ib0zH2b43/2KWzTJIzQZYJqdH1bv+V4M5zVkp/+tsz5/NdaUQV7ViYpStjxlJnoZA6s9vtPaBGPFFkIWKpNZBGjBGgFvjaGorKCv8PgpAVSi7 root@hslave1
ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQC7E/JHw7SJYwcnjrzeCMo/FSecihx0c4mmmkU2ebWuWZL+3aug/zZ8IqSmajhFp8U8rY2Vd0eT8E2GgtTWsb0Fq1WwGgXeXCJE44hf7+RxaUzSnhSz/IlsSCjyXqKVl3ZNXxvFjHTcOMa7AuYNIDkibXaytfcLkQS+8f6dpFQTwrAGTph2djTuB/emEmwV2w1l+TmxljgQ/xAydskpSWKcKMFHqCASXdhcn5ON7QD4y3yDXaeU6WJYhNECKoDlU7MOnUckS7C9s1spOL+Wfa+l16t3HU9g7osqLEVvi7xIoQev9R7QV5Zme/LgHFPHYZsA6rX9WFinU0cjDQqLESyv root@hslave2

[root@localhost .ssh]# ssh hmaster
[root@localhost .ssh]# ssh hslave1
[root@localhost .ssh]# ssh hslave2

hmaster
[root@hmaster .ssh]# cat id_rsa.pub >> authorized_keys

hslave1
[root@hslave1 .ssh]# scp id_rsa.pub hadoop@hmaster:/home/hadoop/.ssh/111
hslave2
[root@hslave1 .ssh]# scp id_rsa.pub hadoop@hmaster:/home/hadoop/.ssh/222

然后在hmaster上
cat 111 >> authorized_keys
cat 222 >> authorized_keys
然后在hmaster上执行
[root@hmaster .ssh]# scp authorized_keys hadoop@hslave1:/home/hadoop/.ssh/authorized_keys
[root@hmaster .ssh]# scp authorized_keys hadoop@hslave2:/home/hadoop/.ssh/authorized_keys

权限设置
 chmod 700 .ssh
 chmod 600 .ssh/*

4. 修改hosts文件
[root@localhost ~]# vi /etc/hosts

192.168.84.128 hmaster
192.168.84.131 hslave1
192.168.84.132 hslave2

5.关闭主机防火墙
# 关闭防火墙
systemctl stop firewalls.service

#禁止防火墙开机启动
systemctl disable firewalls.service

6.关闭 selinux
vi /etc/sysconfig/selinux
修改 SELINUX 值为disabled
getenforce 命令查看 selinux状态为disabled


7.安装jdk

8.安装hadoop
master                         slave1                                    slave2
NameNode                 ResourceManage     
DataNode                         DataNode                             DataNode
NodeManager             NodeManager                         NodeManager
HistoryServer                                                       SecondaryNameNode

切换用户 hadoop
切换到    cd /home/hadoop 目录下
tar -zxvf hadoop-2.8.5.tar.gz

    修改 hadoop 解压缩后 etc目录下 hadoop-env.sh、mapred-env.sh、yarn-env.sh文件中的JDK路径

export JAVA_HOME=/usr/local/java/jdk1.8.0_91/


修改core-site.xml
<configuration>
<!-- 指定NameNode主机和hdfs端口 -->
  <property>
   <name>fs.defaultFS</name>
   <value>hdfs://hmaster:9000</value>
 </property>
<!-- 指定tmp文件夹路径 -->
 <property>
   <name>hadoop.tmp.dir</name>
   <value>/home/hadoop/data/tmp</value>
 </property>
</configuration>

[hadoop@hmaster ~]$ mkdir -p data/tmp


配置hdfs-site.xml
<property>
   <name>dfs.replication</name>
   <value>3</value>
</property>
<!--注释掉
<property>
   <name>dfs.namenode.secondary.http-address</name>
   <value>hslave2:50090</value>
</property>-->
<property>
   <name>dfs.namenode.name.dir</name>
   <value>/home/hadoop/data/dfs/namenode</value>
</property>
<property>
   <name>dfs.datanode.data.dir</name>
   <value>/home/hadoop/data/dfs/datanode</value>
</property>
<property>
    <name>dfs.permissions</name>
    <value>false</value>        
    <description>need not permissions</description>   
</property>

配置slaves
hmaster
hslave1
hslave2


配置 yarn-site.xml
<property>
    <name>yarn.nodemanager.aux-services</name>
    <value>mapreduce_shuffle</value>
</property>
<property>
    <name>yarn.resourcemanager.hostname</name>
    <value>hmaster</value>
</property>
<property>
    <name>yarn.log-aggregation-enable</name>
    <value>true</value>
</property>
<property>
    <name>yarn.log-aggregation.retain-seconds</name>
    <value>106800</value>
</property>

配置mapred-site.xml
[hadoop@hmaster hadoop]$ cp mapred-site.xml.template mapred-site.xml
<property>
    <name>mapreduce.framework.name</name>
    <value>yarn</value>
</property>
<property>
    <name>mapreduce.jobhistory.address</name>
    <value>hmaster:10020</value>
</property>
<property>
    <name>mapreduce.jobhistory.webapp.address</name>
    <value>hmaster:19888</value>
</property>

scp -r /home/hadoop/hadoop-2.8.5 hadoop@hslave1:/home/hadoop/
scp -r /home/hadoop/data hadoop@hslave1:/home/hadoop/

scp -r /home/hadoop/hadoop-2.8.5 hadoop@hslave2:/home/hadoop/
scp -r /home/hadoop/data hadoop@hslave2:/home/hadoop/

[hadoop@hslave1 hadoop-2.8.5]$ mkdir logs

9.配置Hadoop 环境变量

su root
vi  /etc/profile

添加如下配置:

#hadoop
export HADOOP_HOME=/home/hadoop/hadoop-2.8.5
export PATH=${JAVA_HOME}/bin:${MYSQL_HOME}/bin:${HADOOP_HOME}/bin:${HADOOP_HOME}/sbin:$PATH

修改完成,保存后执行 source /etc/profile 命令,然后scp命令,拷贝/etc/profile 文件至 slave1和 slave2 主机。

scp /etc/profile slave1:/etc/profile
scp /etc/profile slave2:/etc/profile

同样在slave1和slave2 主机上执行 source /etc/profile 重新加载配置信息。

10.启动集群

$HADOOP_HOME/bin/hdfs namenode –format

启动HDFS
sbin/start-dfs.sh

 

 启动YARN
ssh slave1
$HADOOP_HOME/sbin/start-yarn.sh

 

$HADOOP_HOME/sbin/yarn-daemon.sh start resourcemanager


启动日志服务器
[hadoop@master ~]$ $HADOOP_HOME/sbin/mr-jobhistory-daemon.sh start historyserver

关闭
start-all.sh

或者
start-dfs.sh
start-yarn.sh

查看HDFS Web页面
地址为 NameNode 进程运行主机ip,端口为50070,http://192.168.84.128:50070 。

4.5 查看YARN Web页面
地址为ResourceManager 进程运行主机,http://192.168.84.128:8088


4.6 查看JobHistory Web 页面
地址为JobHistoryServer 进程运行主机ip,端口为19888,通过配置文件查看自己的JobHistory web端口,http://192.168.84.128:19888

问题
方法1:停掉集群,删除问题节点的data目录下的所有内容。即hdfs-site.xml文件中配置的dfs.data.dir目录。重新格式化namenode。

    方法2:先停掉集群,然后将datanode节点目录/dfs/data/current/VERSION中的修改为与namenode一致即可

5. 测试Job
5.1  准备mapreduce输入文件wc.input

[hadoop@master ~]$ cat wc.input
hadoop mapreduce hive
hbase spark storm
sqoop hadoop hive
spark hadoop


hdfs dfs -ls /input

5.2 在HDFS创建输入目录input

[hadoop@master ~]$ hdfs dfs -mkdir /input

5.3 将wc.input 上传到HDFS

[hadoop@master ~]$ hdfs dfs -put wc.input /input

5.4 运行hadoop自带的mapreduce Demo

[hadoop@master ~]$ yarn jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.8.5.jar wordcount /input/wc.input /output

因为/etc/profile文件中已配置过hadoop环境变量,所以可以直接运行yarn命令,如下所示为执行结果。

5.5 查看输出文件

[hadoop@master ~]$ hdfs dfs -ls /output/

[hadoop@master ~]$ hdfs dfs -cat /output/part-r-00000

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值