1.参考资料
http://blog.leanote.com/post/du00/%E6%90%AD%E5%BB%BAHadoop-Spark%E7%9A%84%E5%8D%95%E6%9C%BA%E7%8E%AF%E5%A2%83
2.安装JDK
[root@localhost /]# cd usr
[root@localhost usr]# mkdir java
[root@localhost java]# ls
hadoop-2.6.2.tar.gz jdk-8u65-linux-x64.tar.gz
[root@localhost java]# tar zxvf jdk-8u65-linux-x64.tar.gz
[root@localhost java]# ls
hadoop-2.6.2.tar.gz jdk1.8.0_65 jdk-8u65-linux-x64.tar.gz
[root@localhost java]# rm -rf jdk-8u65-linux-x64.tar.gz
设置jdk环境变量,使用root用户切换到/etc/profile.d/目录下
[root@localhost profile.d]# cd /etc/profile.d/
[root@localhost profile.d]# vi java.sh
添加:
JAVA_HOME=/usr/java/jdk1.8.0_65
CLASSPATH=$JAVA_HOME/lib:$CLASSPATH
PATH=$JAVA_HOME/bin:$PATH
export JAVA_HOME CLASSPATH PATH
设置完成后重启一下系统,执行java -version测试设置是否正确
3.系统配置
3.1设置hostname
[root@localhost java]# vi /etc/hosts
添加 192.168.2.133 single.hadoop.com
[root@localhost java]# ping hadoop.single.com
测试是否设置正确
3.2关闭防火墙
[root@localhost java]# service iptables stop
iptables:清除防火墙规则: [确定]
iptables:将链设置为政策 ACCEPT:filter [确定]
iptables:正在卸载模块: [确定]
[root@localhost java]# chkconfig iptables off
3.3设置hadoop用户和用户组
[root@localhost java]# groupadd hadoop //创建用户组
[root@localhost java]# useradd -g hadoop hadoop //新建hadoop用户并增加到hadoop工作组
[root@localhost java]# passwd hadoop //设置密码
4.安装Hadoop
4.1安装
[root@localhost java]# tar zxvf hadoop-2.6.2.tar.gz //解压
[root@localhost java]# chown -R hadoop:hadoop hadoop-2.6.2 //将hadoop-2.6.2操作权限赋给hadoop用户
4.2创建数据目录
[root@localhost java]# mkdir -p /data/dfs/name
[root@localhost java]# mkdir -p /data/dfs/data
[root@localhost java]# mkdir -p /data/tmp
[root@localhost java]# chown -R hadoop:hadoop /data //将/data文件权限赋给hadoop
5.配置hadoop
5.1配置etc/hadoop/core-site.xml
[root@localhost hadoop]# cd /usr/java/hadoop-2.6.2/etc/hadoop
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://single.hadoop.com:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/data/tmp</value>
<description>Abase for other temporary directories.</description>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
5.2配置etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/dfs/name</value>
<description>Determineswhere on the local filesystem the DFS name node should store the name table. Ifthis is a comma-delimited list of directories then the name table is replicatedin all of the directories, for redundancy. </description>
<final>true</final>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/dfs/data</value>
<description>Determineswhere on the local filesystem an DFS data node should store its blocks. If thisis a comma-delimited list of directories, then data will be stored in all nameddirectories, typically on different devices.Directories that do not exist areignored.
</description>
<final>true</final>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
5.3配置mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration
5.4配置yarn-site.xml
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
5.5配置slaves
添加 single.hadoop.com
5.6设置Hadoop环境变量,
使用root账户创建/etc/profile.d/hadoop.sh 文件
[root@localhost hadoop]# vi /etc/profile.d/hadoop.sh
HADOOP_HOME=/usr/java/hadoop-2.6.2
PATH=$HADOOP_HOME/bin:$PATH
export HADOOP_HOME PATH
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
6.SSH无密码验证配置
[root@localhost~]# su hadoop //切换到 hadoop用户下
[hadoop@localhost root]$ cd //直接输入cd会自动切换到/home/hadoop根目录下
[hadoop@localhost ~]$ mkdir .ssh //创建.ssh目录
[hadoop@localhost ~]$ ssh-keygen -t rsa //输入此命令后,一直按 Enter 键
[hadoop@localhost ~]$ cd .ssh //切换到 .ssh 目录下
[hadoop@localhost .ssh]$ cp id_rsa.pub authorized_keys //把生成的 id_rsa.pub 复制一份,命名为authorized_keys
[hadoop@localhost .ssh]$ cd .. //后退到根目录下
[hadoop@localhost ~]$ chmod 700 .ssh //.ssh 文件夹权限必须是 700
[hadoop@localhost ~]$ chmod 600 .ssh/* //.ssh里面的文件
[hadoop@localhost ~]$ ssh single.hadoop.com //第一次登陆需要密码,第二次以后登录就不需要密码,此时表明设置成功
7.运行Hadoop
[hadoop@single hadoop-2.6.2]$ bin/hadoop namenode -format
[hadoop@single hadoop-2.6.2]$ sbin/start-all.sh
[hadoop@single hadoop-2.6.2]$ jps
4296 DataNode
4584 ResourceManager
4681 NodeManager
4445 SecondaryNameNode
4174 NameNode
4895 Jps
通过http://192.168.2.133:50070 访问HDFS管理界面
http://192.168.2.133:8088 访问任务情况
8.测试
创建一个目录到HDFS系统中
[hadoop@single hadoop-2.6.2]$ bin/hadoop fs -mkdir /test
在home/hadoop目录西面新建文本文件test.t然后把文件put到hdfs中
[hadoop@single hadoop-2.6.2]$ bin/hadoop fs -put /home/hadoop/test.txt /test
[hadoop@single hadoop-2.6.2]$ bin/hadoop fs -ls /test
测试执行
[hadoop@single hadoop-2.6.2]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.2.jar wordcount /test/test.txt /test/out
查看结果
[hadoop@single hadoop-2.6.2]$ bin/hadoop fs -text /test/out/part-r-00000
执行后就能看到结果
错误:
INFO client.RMProxy: Connecting to ResourceManager at /0.0.0.0:8032
INFO ipc.Client: Retrying connect to server: 0.0.0.0/0.0.0.0:8032. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1 SECONDS)
解决方式:
The problem onnecting recource manager was because ive needed to add a few properties to yarn-site.xml :
<property>
<name>yarn.resourcemanager.address</name>
<value>127.0.0.1:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address</name>
<value>127.0.0.1:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>127.0.0.1:8031</value>
</property>
Yet, runing and connecting is successful now