目录
6.hadoop02,hadoop03重复3-5的步骤配置就可以
2.关闭nodemanager 、resourcemanager和historymanager
3. 启动nodemanager 、resourcemanager和historymanager
hadoop安装
在这里hadoop-2.7.3.tar.gz,官网下载
1.安装hadoop,配置环境变量
1.上传hadoop-2.7.3.tar.gz到指定位置
[root@hadoop01 software]# ls
hadoop-2.7.3.tar.gz
2.解压文件
[root@hadoop01 software]# tar -zxvf hadoop-2.7.3.tar.gz -C /opt/module/
3.配置hadoop-env.sh
在esc状态下:set nu显示行号
打开hadoop-env.sh修改jdk路径
[root@hadoop01 ~]# cd /opt/module/hadoop-2.7.3/etc/hadoop
[root@hadoop01 hadoop]# vim hadoop-env.sh
25 export JAVA_HOME=/opt/module/jdk1.8.0_144
4.添加hadoop的路径
[root@hadoop01 hadoop]# vim /etc/profile #最后一行添加
export HADOOP_HOME=/opt/module/hadoop-2.7.3
export PATH=$PATH:$HADOOP_HOME/bin
export PATH=$PATH:$HADOOP_HOME/sbin
5.修改让文件生效
[root@hadoop01 software]# source /etc/profile
6.hadoop02,hadoop03重复3-5的步骤配置就可以
2.ssh免密码登录
1.hadoop01生成公钥和私钥
[root@hadoop01 ~]# cd .ssh
[root@hadoop01 .ssh]# pwd
/root/.ssh
[root@hadoop01 .ssh]# ssh-keygen -t rsa
[root@hadoop01 .ssh]# ssh-copy-id hadoop01 #yes回车,输入密码
[root@hadoop01 .ssh]# ssh-copy-id hadoop02
[root@hadoop01 .ssh]# ssh-copy-id hadoop03
[root@hadoop01 .ssh]# ssh-copy-id localhost
2.hadoop02,hadoop03重复步骤配置就可以
3.编写集群同步脚本
1.创建目录
[root@hadoop01 ~]# mkdir bin
2.创建文件
[root@hadoop01 bin]# touch xsync
3.编写集群同步脚本
[root@hadoop01 bin]# vim xsync
#!/bin/bash
#1 获取输入参数个数,如果没有参数,直接退出
pcount=$#
if((pcount==0)); then
echo no args;
exit;
fi
#2 获取文件名称
p1=$1
fname=`basename $p1`
echo fname=$fname
#3 获取上级目录到绝对路径
pdir=`cd -P $(dirname $p1); pwd`
echo pdir=$pdir
#4 获取当前用户名称
user=`whoami`
#5 循环
for((host=1; host<4; host++)); do
#echo $pdir/$fname $user@hadoop$host:$pdir
echo --------------- hadoop0$host ----------------
rsync -rvl $pdir/$fname $user@hadoop0$host:$pdir
done
4.文件加上权限
[root@hadoop01 bin]# chmod 777 xsync
5.同步目录
[root@hadoop01 bin]# /root/bin/xsync /root/bin
4.配置hdfs集群
1.配置core-site.xml
[root@hadoop01 hadoop]# vim core-site.xml
<configuration>
<!-- 指定HDFS中NameNode的地址 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://hadoop01:9000</value>
</property>
<!-- 指定hadoop运行时产生文件的存储目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop-2.7.3/data/tmp</value>
</property>
</configuration>
~
2.配置hdfs-site.xml
[root@hadoop01 hadoop]# vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop01:50090</value>
</property>
</configuration>
3.配置slaves
[root@hadoop01 hadoop]# vim slaves
hadoop01
hadoop02
hadoop03
5.配置yarn集群
1.配置yarn-env.sh
[root@hadoop01 hadoop]# vim yarn-env.sh
23 export JAVA_HOME=/opt/module/jdk1.8.0_144
2.配置yarn-site.xml
[root@hadoop01 hadoop]# vim yarn-site.xml
<configuration>
<!-- reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<!-- 日志聚集功能使能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
3.配置mapred-env.sh
[root@hadoop01 hadoop]# vim mapred-env.sh
export JAVA_HOME=/opt/module/jdk1.8.0_144
export HADOOP_JOB_HISTORYSERVER_HEAPSIZE=1000
export HADOOP_MAPRED_ROOT_LOGGER=INFO,RFA
4.配置mapred-site.xml
[root@hadoop01 hadoop]# mv mapred-site.xml.template mapred-site.xml #修改名字
[root@hadoop01 hadoop]# vim mapred-site.xml
<configuration>
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop01:19888</value>
</property>
</configuration>
5.同步集群到hadoop02,hadoop03
[root@hadoop01 hadoop]# /root/bin/xsync /opt/module/hadoop-2.7.3/
6.启动集群测试
1.第一次启动集群,格式化namenode
[root@hadoop01 hadoop-2.7.3]# bin/hdfs namenode -format
2.启动进程
[root@hadoop01 hadoop-2.7.3]# sbin/start-dfs.sh
3.查看进程
[root@hadoop01 hadoop-2.7.3]# jps
10496 Jps
28469 SecondaryNameNode
28189 NameNode
28286 DataNode
[root@hadoop02 ~]# jps
27242 Jps
3614 DataNode
[root@hadoop03 ~]# jps
27242 Jps
3614 DataNode
4.访问端口号50070
5.启动yarn集群
[root@hadoop01 hadoop-2.7.3]# sbin/start-yarn.sh
6.查看yarn进程
[root@hadoop01 hadoop-2.7.3]# jps
49155 NodeManager
28469 SecondaryNameNode
48917 ResourceManager
10600 Jps
28189 NameNode
28286 DataNode
[root@hadoop02 ~]# jps
3736 NodeManager
27242 Jps
3614 DataNode
[root@hadoop03 ~]# jps
3736 NodeManager
27242 Jps
3614 DataNode
7.查看yarn端口号8088
7.配置本地映射关系
1.在Windows找到hosts文件
2.进行修改,添加ip和主机名
# Copyright (c) 1993-2009 Microsoft Corp.
#
# This is a sample HOSTS file used by Microsoft TCP/IP for Windows.
#
# This file contains the mappings of IP addresses to host names. Each
# entry should be kept on an individual line. The IP address should
# be placed in the first column followed by the corresponding host name.
# The IP address and the host name should be separated by at least one
# space.
#
# Additionally, comments (such as these) may be inserted on individual
# lines or following the machine name denoted by a '#' symbol.
#
# For example:
#
# 102.54.94.97 rhino.acme.com # source server
# 38.25.63.10 x.acme.com # x client host
# localhost name resolution is handled within DNS itself.
# 127.0.0.1 localhost
# ::1 localhost
# 最后一行添加
192.168.86.101 hadoop01
192.168.86.102 hadoop02
192.168.86.103 hadoop03
8.配置本地yum源
1.创建目录
[root@hadoop01 ~]# mkdir /mnt/cdrom
[root@hadoop01 ~]# cd /mnt
[root@hadoop01 mnt]# ll
total 4
dr-xr-xr-x. 7 root root 4096 May 23 2016 cdrom
2.挂载光驱
[root@hadoop01 mnt]# mount -t auto /dev/cdrom /mnt/cdrom
[root@hadoop01 mnt]# cd /etc/yum.repos.d/
[root@hadoop01 yum.repos.d]# mkdir bak
[root@hadoop01 yum.repos.d]# mv CentOS-* bak
3.创建配置CentOS-DVD.repo
[root@hadoop01 yum.repos.d]# touch CentOS-DVD.repo
[root@hadoop01 yum.repos.d]# vim CentOS-DVD.repo
[centos6-dvd]
name=Welcome to local source yum
baseurl=file:///mnt/cdrom
enabled=1
gpgcheck=0
4.加载yum源
[root@hadoop01 yum.repos.d]# yum clean all
[root@hadoop01 yum.repos.d]# yum repolist all
9.配置hadoop历史日志
1.配置历史服务器
1. 配置mapred-site.xml
[root@hadoop01 hadoop]# vim mapred-site.xml
<configuration>
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop01:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop01:19888</value>
</property>
</configuration>
2. 查看启动历史服务器文件目录
[root@hadoop01 hadoop-2.7.3]# ls sbin/ | grep mr
mr-jobhistory-daemon.sh
3.启动历史服务器
[root@hadoop01 hadoop-2.7.2]$ sbin/mr-jobhistory-daemon.sh start historyserver
4.查看历史服务器是否启动
[root@hadoop01 hadoop-2.7.2]$ jps
5.查看jobhistory,端口号19888
http://hadoop01:19888/jobhistory
2.配置日志的聚集
1.配置yarn-site.xml
[root@hadoop01 hadoop]# vim yarn-site.xml
<configuration>
<!-- reducer获取数据的方式 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!-- 指定YARN的ResourceManager的地址 -->
<property>
<name>yarn.resourcemanager.hostname</name>
<value>hadoop01</value>
</property>
<!-- 日志聚集功能使能 -->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!-- 日志保留时间设置7天 -->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>604800</value>
</property>
</configuration>
2.关闭nodemanager 、resourcemanager和historymanager
[root@hadoop01 hadoop-2.7.3]$ sbin/yarn-daemon.sh stop resourcemanager
[root@hadoop01 hadoop-2.7.3]$ sbin/yarn-daemon.sh stop nodemanager
[root@hadoop01 hadoop-2.7.3]$ sbin/mr-jobhistory-daemon.sh stop historyserver
3. 启动nodemanager 、resourcemanager和historymanager
[root@hadoop01 hadoop-2.7.3]$ sbin/yarn-daemon.sh start resourcemanager
[root@hadoop01 hadoop-2.7.3]$ sbin/yarn-daemon.sh start nodemanager
[root@hadoop01 hadoop-2.7.3]$ sbin/mr-jobhistory-daemon.sh start historyserver
4.删除hdfs上已经存在的hdfs文件
[root@hadoop01 hadoop-2.7.3]$ bin/hdfs dfs -rm -R /user/root/output
5.执行wordcount程序
[root@hadoop01 hadoop-2.7.3]$ hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar wordcount /user/root/input /user/root/output