1、环境和安装包
一台虚拟机,2核4G
经过编译后的hadoop包:hadoop-2.6.0-cdh5.7.0.tar.gz,主要是支持压缩,为后面学习hive压缩等做准备
,参考我博客:https://blog.csdn.net/greenplum_xiaofan/article/details/95466703
jdk-8u45-linux-x64.gz
2、添加用户和上传软件
[root@vm01 ~]# useradd hadoop
[root@vm01 ~]# su - hadoop
[hadoop@vm01 ~]$ mkdir app data software lib source
[hadoop@vm01 ~]$ cd software
[hadoop@vm01 software]$ rz
[hadoop@vm01 software]$ ll
-rw-rw-r--. 1 hadoop hadoop 192519341 Jul 10 03:32 hadoop-2.6.0-cdh5.7.0.tar.gz
-rw-r--r--. 1 hadoop hadoop 173271626 Jul 10 04:41 jdk-8u45-linux-x64.gz
3、安装JDK
先清除自带的openjdk
[root@vm01 ~]# java -version #查看系统是否已经默认安装了jdk
[root@vm01 ~]# rpm -qa | grep jdk #查询具体安装哪些jdk软件
[root@vm01 ~]# yum -y remove XXX #将安装的jdk卸载
[root@vm01 ~]# mkdir /usr/java # java安装目录
[root@vm01 ~]# mkdir /usr/share/java #部署CDH需要mysql jdbc jar包
[root@vm01 ~]# tar -zxvf /home/hadoop/software/jdk-8u45-linux-x64.gz -C /usr/java/ #解压
[root@vm01 ~]# cd /usr/java
[root@vm01 ~]# chown -R root:root jdk1.8.0_45
[root@vm01 ~]# vi /etc/profile #编辑环境变量文件,追加如下内容:
export JAVA_HOME=/usr/java/jdk1.8.0_45
export JRE_HOME=$JAVA_HOME/jre
export CLASSPATH=.:$JAVA_HOME/lib:$JER_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JER_HOME/bin:$PATH
[root@vm01 ~]# source /etc/profile #更新环境变量
[root@vm01 ~]# java -version
java version "1.8.0_45"
Java(TM) SE Runtime Environment (build 1.8.0_45-b14)
Java HotSpot(TM) 64-Bit Server VM (build 25.45-b02, mixed mode)
4、配置ssh免密码互信
[hadoop@vm01 ~]$ su - hadoop
[hadoop@vm01 ~]$ ssh-keygen #生成~/.ssh文件夹
[hadoop@vm01 ~]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys #将公钥放到一个文件夹中
[hadoop@vm01 ~]$ cd .ssh
[hadoop@vm01 .ssh]$ chmod 600 authorized_keys
[hadoop@vm01 .ssh]$ ssh vm01 date #第一次需要yes确认,vm01需要配置hosts文件
5、Hadoop安装
[hadoop@vm01 ~]$ tar -zxvf ~/software/hadoop-2.6.0-cdh5.7.0.tar.gz -C ~/app/
[hadoop@vm01 ~]$ cd app/hadoop-2.6.0-cdh5.7.0/etc/hadoop #配置文件都在这目录下
[hadoop@vm01 hadoop]$ pwd
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/etc/hadoop
配置hadoop-env.sh
文件
[hadoop@vm01 hadoop]$ vi hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_45
export HADOOP_PREFIX=/home/hadoop/app/hadoop-2.6.0-cdh5.7.0
配置core-site.xml
文件
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://vm01:9000</value>
</property>
</configuration>
配置hdfs-site.xml
文件
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/name</value>
</property>
<property>
<name>dfs.namenode.edits.dir</name>
<value>${dfs.namenode.name.dir}</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/data/dfs/data</value>
</property>
<property>
<name>fs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>vm01:50090</value>
</property>
<property>
<name>dfs.namenode.secondary.https-address</name>
<value>vm01:50091</value>
</property>
</configuration>
配置mapred-site.xml
文件
注意:需要先复制一份并重命名为mapred-site.xml
[hadoop@vm01 hadoop]$ cp mapred-site.xml.template mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
配置yarn-site.xml
文件
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
6、启动和测试
格式化namenode
[hadoop@vm01 sbin]$ hadoop namenode -format
[hadoop@vm01 sbin]$ pwd #启动的脚本都放在sbin目录下
/home/hadoop/app/hadoop-2.6.0-cdh5.7.0/sbin
[hadoop@vm01 sbin]$ ./start-yarn.sh
[hadoop@vm01 sbin]$ ./start-dfs.sh
[hadoop@vm01 sbin]$ jps
32240 SecondaryNameNode
32944 NodeManager
32019 NameNode
31547 DataNode
32670 ResourceManager
33742 Jps
Web登陆:vm01:50070