( 1 ) 关闭防火墙 selinux 。 修改主机名解析
/etc/init.d/iptables stop
chkconfig iptables off
[root@hadoop home]# cat /etc/sysconfig/selinux | grep disabled
# disabled - No SELinux policy is loaded.
SELINUX=disabled
[root@hadoop ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.252 hadoop
(2) 创建 用户组和用户 ,以及修改sudo 权限 和 ssh 互信
groupadd hadoop
useradd hadoop -g hadoop
passwd hadoop
[root@hadoop ~]# cat /etc/sudoers |grep hadoop
hadoop ALL=(ALL) NOPASSWD:ALL,/usr/sbin/passwd root
ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
(3) 配置Java 环境变量
mkdir -p /usr/java
tar xf /root/jdk-7u75-linux-x64.gz -C /usr/java
[root@hadoop ~]# cat /etc/profile| grep JAVA
export JAVA_HOME=/usr/java/jdk1.7.0_75
export JRE_HOME=/usr/java/jdk1.7.0_75/jre
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
source /etc/profile
[root@hadoop ~]# java -version
java version "1.7.0_75"
Java(TM) SE Runtime Environment (build 1.7.0_75-b13)
Java HotSpot(TM) 64-Bit Server VM (build 24.75-b04, mixed mode)
(4) 配置安装 hadoop
将安装介质上传到hadoop的家目录下 ,并解压到 /usr/local/hadoop 目录下 ,并修改hadoop用户的环境变量
su - hadoop
mkdir app
mkdir -p /home/hadoop/app/hadoop/hdfs/data
mkdir -p /home/hadoop/app/hadoop/hdfs/name
mkdir -p /home/hadoop/app/hadoop/data/tmp
[hadoop@hadoop ~]$ cat ~hadoop/.bash_profile |grep HADOOP 配置hadoop 环境变量 ,这个变量也可以配置在 /etc/profile,这里配置到用户的环境变量中
export HADOOP_HOME=/home/hadoop/app/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
如果是多个用户使用的话 添加到 /etc/profile 文件中去,配置如下
[hadoop@hadoop hadoop]$ cat /etc/profile | grep HOME
#############java_home#########
export JAVA_HOME=/usr/java/jdk1.7.0_79
export JRE_HOME=/usr/java/jdk1.7.0_79/jre
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=/usr/local/mysql/bin:$PATH
#############hadoop_home###########
export HADOOP_HOME=/home/hadoop/app/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin/:$PATH
(5) 修改 hadoop-env.sh yarn-env.sh mapred-env.sh 配置文件指定JAVA_HOME 路径
[hadoop@hadoop hadoop]$ cat hadoop-env.sh |grep JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_75
[hadoop@hadoop hadoop]$ cat mapred-env.sh |grep JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_75
[hadoop@hadoop hadoop]$ cat yarn-env.sh |grep JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_75
==================================
(6) 配置 core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml 以及slaves
1) 修改 core-site.xml 修改配置文件,指定文件系统,以及namenode主节点所在的机器位置以及交互端口号 及指定Hadoop的临时存放目录
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1.oracle.com:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/app/hadoop/data/tmp</value>
</property>
2) 修改 hdfs-site.xml, 指定HDFS文件存储的副本个数,由于这个是单节点,所以需要指定个数为1
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node1.oracle.com:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/app/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/app/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
由于是伪分布式环境 ,只能设置为1
3)修改mapred-site.xml,指定MR运行在YARN上
mv mapred-site.xml.template mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-spark:10020</value>
<description>MapReduce JobHistory Server IPC host:port</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-spark:19888</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/history/done_intermediate</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/history/hadoop-yarn/staging</value>
</property>
要是在 Windows 上查询 job 执行情况 需要 修改 file:///C:/Windows/System32/drivers/etc/hosts
192.168.137.253 hadoop-spark
在 Linux 上 不需要修改任何东西
需要 在 HDFS上 创建如下的文件夹
hdfs dfs -mkdir -p /history/done
hdfs dfs -mkdir -p /history/done
hdfs dfs -mkdir -p /history/done_intermediate
hdfs dfs -mkdir -p /history/hadoop-yarn/staging
4) 修改 yarn-site.xml文件,指定RM主节点的位置【可配置项】 ,及指定yarn上运行的service服务
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1.oracle.com</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
5) 修改
[hadoop@hadoop hadoop]$ cat slaves
hadoop
====================== 格式namenode ,并验证 ======================
(6) 格式化namenode(是对namenode进行初始化)
hdfs namenode -format (hadoop namenode -format)
(7) 启动hadoop
先启动HDFS
[hadoop@hadoop hadoop-2.7.3]$ pwd
/usr/local/hadoop/hadoop-2.7.3
sbin/start-dfs.sh
再启动YARN
sbin/start-yarn.sh
mr-jobhistory-daemon.sh start historyserver -- 启动历史查询
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start nodemanager
mr-jobhistory-daemon.sh start historyserver
[hadoop@hadoop hadoop-2.7.3]$ jps 查看进程是否有异常
32656 NameNode
33548 Jps
32754 DataNode
33096 ResourceManager
33195 NodeManager
32941 SecondaryNameNode
http://192.168.1.252:50070/dfshealth.html#tab-overview HDFS 外部交互网页界面
http://192.168.1.252:8088/cluster
==== 测试 HDFS ===
[hadoop@hadoop hadoop-2.7.3]$ bin/hdfs dfs 查看帮助
hdfs dfs -mkdir -p conf/temp
hdfs dfs -mkdir /temp
hdfs dfs -put $HADOOP_HOME/etc/hadoop/core-site.xml /user/hadoop/conf/temp 上传文件到指定的目录
hdfs dfs -text /user/hadoop/conf/temp/core-site.xml 读取文件的内容
hdfs dfs -get /user/hadoop/conf/temp/core-site.xml /home/hadoop/ 下载到指定文件夹
hdfs dfs -rm /user/hadoop/conf/temp/core-site.xml
测试 wordcont
[hadoop@hadoop hadoop-2.7.3]$ hdfs dfs -put /home/hadoop/wc.input /user/hadoop/conf/temp/
[hadoop@hadoop hadoop-2.7.3]$ hdfs dfs -put /home/hadoop/wc.input /user/hadoop/conf/temp/wcb.input
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/wcb.input /user/hadoop/conf/temp/output1
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/wc.input /user/hadoop/conf/temp/output1
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/wc.input /user/hadoop/conf/temp/output
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/core-site.xml /user/hadoop/conf/temp/output2
[hadoop@hadoop mapreduce]$ hdfs dfs -text /user/hadoop/conf/temp/output/part*
DB2 1
are 1
five 2
four 1
hello 3
jack 1
mysql 1
nine 2
one 1
oracle 2
same 1
seven 1
six 4
ten 2
the 2
three 2
two 2
we 1
world 3
/etc/init.d/iptables stop
chkconfig iptables off
[root@hadoop home]# cat /etc/sysconfig/selinux | grep disabled
# disabled - No SELinux policy is loaded.
SELINUX=disabled
[root@hadoop ~]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.1.252 hadoop
(2) 创建 用户组和用户 ,以及修改sudo 权限 和 ssh 互信
groupadd hadoop
useradd hadoop -g hadoop
passwd hadoop
[root@hadoop ~]# cat /etc/sudoers |grep hadoop
hadoop ALL=(ALL) NOPASSWD:ALL,/usr/sbin/passwd root
ssh-keygen -t rsa -f ~/.ssh/id_rsa -P ''
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 600 ~/.ssh/authorized_keys
(3) 配置Java 环境变量
mkdir -p /usr/java
tar xf /root/jdk-7u75-linux-x64.gz -C /usr/java
[root@hadoop ~]# cat /etc/profile| grep JAVA
export JAVA_HOME=/usr/java/jdk1.7.0_75
export JRE_HOME=/usr/java/jdk1.7.0_75/jre
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
source /etc/profile
[root@hadoop ~]# java -version
java version "1.7.0_75"
Java(TM) SE Runtime Environment (build 1.7.0_75-b13)
Java HotSpot(TM) 64-Bit Server VM (build 24.75-b04, mixed mode)
(4) 配置安装 hadoop
将安装介质上传到hadoop的家目录下 ,并解压到 /usr/local/hadoop 目录下 ,并修改hadoop用户的环境变量
su - hadoop
mkdir app
mkdir -p /home/hadoop/app/hadoop/hdfs/data
mkdir -p /home/hadoop/app/hadoop/hdfs/name
mkdir -p /home/hadoop/app/hadoop/data/tmp
[hadoop@hadoop ~]$ cat ~hadoop/.bash_profile |grep HADOOP 配置hadoop 环境变量 ,这个变量也可以配置在 /etc/profile,这里配置到用户的环境变量中
export HADOOP_HOME=/home/hadoop/app/hadoop
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
如果是多个用户使用的话 添加到 /etc/profile 文件中去,配置如下
[hadoop@hadoop hadoop]$ cat /etc/profile | grep HOME
#############java_home#########
export JAVA_HOME=/usr/java/jdk1.7.0_79
export JRE_HOME=/usr/java/jdk1.7.0_79/jre
export PATH=$JAVA_HOME/bin:$JRE_HOME/bin:$PATH
export CLASSPATH=$JAVA_HOME/lib:$JRE_HOME/lib:$CLASSPATH
export PATH=/usr/local/mysql/bin:$PATH
#############hadoop_home###########
export HADOOP_HOME=/home/hadoop/app/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin/:$PATH
(5) 修改 hadoop-env.sh yarn-env.sh mapred-env.sh 配置文件指定JAVA_HOME 路径
[hadoop@hadoop hadoop]$ cat hadoop-env.sh |grep JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_75
[hadoop@hadoop hadoop]$ cat mapred-env.sh |grep JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_75
[hadoop@hadoop hadoop]$ cat yarn-env.sh |grep JAVA_HOME
export JAVA_HOME=/usr/java/jdk1.7.0_75
==================================
(6) 配置 core-site.xml hdfs-site.xml mapred-site.xml yarn-site.xml 以及slaves
1) 修改 core-site.xml 修改配置文件,指定文件系统,以及namenode主节点所在的机器位置以及交互端口号 及指定Hadoop的临时存放目录
<property>
<name>fs.defaultFS</name>
<value>hdfs://node1.oracle.com:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/hadoop/app/hadoop/data/tmp</value>
</property>
2) 修改 hdfs-site.xml, 指定HDFS文件存储的副本个数,由于这个是单节点,所以需要指定个数为1
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>node1.oracle.com:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/hadoop/app/hadoop/hdfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/hadoop/app/hadoop/hdfs/data</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
由于是伪分布式环境 ,只能设置为1
3)修改mapred-site.xml,指定MR运行在YARN上
mv mapred-site.xml.template mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop-spark:10020</value>
<description>MapReduce JobHistory Server IPC host:port</description>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop-spark:19888</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>/history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>/history/done_intermediate</value>
</property>
<property>
<name>yarn.app.mapreduce.am.staging-dir</name>
<value>/history/hadoop-yarn/staging</value>
</property>
要是在 Windows 上查询 job 执行情况 需要 修改 file:///C:/Windows/System32/drivers/etc/hosts
192.168.137.253 hadoop-spark
在 Linux 上 不需要修改任何东西
需要 在 HDFS上 创建如下的文件夹
hdfs dfs -mkdir -p /history/done
hdfs dfs -mkdir -p /history/done
hdfs dfs -mkdir -p /history/done_intermediate
hdfs dfs -mkdir -p /history/hadoop-yarn/staging
4) 修改 yarn-site.xml文件,指定RM主节点的位置【可配置项】 ,及指定yarn上运行的service服务
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.hostname</name>
<value>node1.oracle.com</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>106800</value>
</property>
5) 修改
[hadoop@hadoop hadoop]$ cat slaves
hadoop
====================== 格式namenode ,并验证 ======================
(6) 格式化namenode(是对namenode进行初始化)
hdfs namenode -format (hadoop namenode -format)
(7) 启动hadoop
先启动HDFS
[hadoop@hadoop hadoop-2.7.3]$ pwd
/usr/local/hadoop/hadoop-2.7.3
sbin/start-dfs.sh
再启动YARN
sbin/start-yarn.sh
mr-jobhistory-daemon.sh start historyserver -- 启动历史查询
hadoop-daemon.sh start namenode
hadoop-daemon.sh start datanode
yarn-daemon.sh start resourcemanager
yarn-daemon.sh start nodemanager
mr-jobhistory-daemon.sh start historyserver
[hadoop@hadoop hadoop-2.7.3]$ jps 查看进程是否有异常
32656 NameNode
33548 Jps
32754 DataNode
33096 ResourceManager
33195 NodeManager
32941 SecondaryNameNode
http://192.168.1.252:50070/dfshealth.html#tab-overview HDFS 外部交互网页界面
http://192.168.1.252:8088/cluster
==== 测试 HDFS ===
[hadoop@hadoop hadoop-2.7.3]$ bin/hdfs dfs 查看帮助
hdfs dfs -mkdir -p conf/temp
hdfs dfs -mkdir /temp
hdfs dfs -put $HADOOP_HOME/etc/hadoop/core-site.xml /user/hadoop/conf/temp 上传文件到指定的目录
hdfs dfs -text /user/hadoop/conf/temp/core-site.xml 读取文件的内容
hdfs dfs -get /user/hadoop/conf/temp/core-site.xml /home/hadoop/ 下载到指定文件夹
hdfs dfs -rm /user/hadoop/conf/temp/core-site.xml
测试 wordcont
[hadoop@hadoop hadoop-2.7.3]$ hdfs dfs -put /home/hadoop/wc.input /user/hadoop/conf/temp/
[hadoop@hadoop hadoop-2.7.3]$ hdfs dfs -put /home/hadoop/wc.input /user/hadoop/conf/temp/wcb.input
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/wcb.input /user/hadoop/conf/temp/output1
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/wc.input /user/hadoop/conf/temp/output1
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/wc.input /user/hadoop/conf/temp/output
hadoop jar hadoop-mapreduce-examples-2.7.3.jar wordcount /user/hadoop/conf/temp/core-site.xml /user/hadoop/conf/temp/output2
[hadoop@hadoop mapreduce]$ hdfs dfs -text /user/hadoop/conf/temp/output/part*
DB2 1
are 1
five 2
four 1
hello 3
jack 1
mysql 1
nine 2
one 1
oracle 2
same 1
seven 1
six 4
ten 2
the 2
three 2
two 2
we 1
world 3