Hadoop实现了一个分布式文件系统(Hadoop Distributed File System),简称HDFS。
HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的数据,适合那些有着超大数据集(large data set)的应用程序。HDFS放宽了(relax)POSIX的要求,可以以流的形式访问(streaming access)文件系统中的数据。
Hadoop的框架最核心的设计就是:HDFS和MapReduce。HDFS为海量的数据提供了存储,则MapReduce为海量的数据提供了计算。
实验环境:
操作系统:redhat6.5 iptables selinux off
hadoop-2.7.3版本,jdk 8版本
Hadoop安装及java环境搭建
解压tar包
[root@server1 mnt]# useradd -u 800 hadoop
[root@server1 mnt]# id hadoop
uid=800(hadoop) gid=800(hadoop) groups=800(hadoop)
[root@server1 mnt]# mv hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz /home/hadoop/
[root@server1 mnt]# cd /home/hadoop/
[root@server1 hadoop]# su hadoop
[hadoop@server1 ~]$ ls
hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ tar zxf jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ ln -s jdk1.7.0_79/ java
[hadoop@server1 ~]$ vim .bash_profile
PATH=$PATH:$HOME/bin:~/java/bin
[hadoop@server1 ~]$ source .bash_profile
[hadoop@server1 ~]$ tar zxf hadoop-2.7.3.tar.gz
[hadoop@server1 ~]$ ln -s hadoop-2.7.3 hadoop
[hadoop@server1 ~]$ cd hadoop
[hadoop@server1 hadoop]$ cd etc/hadoop/
[hadoop@server1 hadoop]$ vim hadoop-env.sh
export JAVA_HOME=/home/hadoop/java
[hadoop@server1 hadoop]$ cd ..
[hadoop@server1 etc]$ ls
hadoop
[hadoop@server1 etc]$ cd ..
[hadoop@server1 hadoop]$ ls
bin etc include lib libexec LICENSE.txt NOTICE.txt README.txt sbin share
[hadoop@server1 hadoop]$ mkdir input
[hadoop@server1 hadoop]$ cp etc/hadoop/*.xml input/
[hadoop@server1 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep input output 'dfs[a-z.]+'
[hadoop@server1 hadoop]$ cd output/
[hadoop@server1 output]$ ls
part-r-00000 _SUCCESS
[hadoop@server1 output]$ cat *
1 dfsadmin
[hadoop@server1 hadoop]$ vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
</configuration>
[hadoop@server1 hadoop]$ vim hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
[hadoop@server1 hadoop]$ vim slaves
172.25.20.1
[hadoop@server1 hadoop]$ ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
[hadoop@server1 hadoop]$ cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
[hadoop@server1 hadoop]$ chmod 0600 ~/.ssh/authorized_keys
[hadoop@server1 hadoop]$ bin/hdfs namenode -format
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls
[hadoop@server1 hadoop]$ bin/hdfs dfs -put input/
Server1:
[hadoop@server1 hadoop]$ sbin/stop-dfs.sh
Stopping namenodes on [server1]
server1: stopping namenode
172.25.20.1: stopping datanode
Stopping secondary namenodes [0.0.0.0]
0.0.0.0: stopping secondarynamenode
[root@server1 hadoop]# yum install nfs-utils -y
[root@server1 hadoop]# /etc/init.d/rpcbind start
Starting rpcbind: [ OK ]
[root@server1 hadoop]# vim /etc/exports
/home/hadoop *(rw,anonuid=800,anongid=800)
[root@server1 hadoop]# /etc/init.d/nfs start
[root@server1 hadoop]# showmount -e
Export list for server1:
/home/hadoop *
[root@server1 hadoop]# exportfs -v
/home/hadoop <world>(rw,wdelay,root_squash,no_subtree_check,anonuid=800,anongid=800)
[root@server1 hadoop]# su – hadoop
Server2,server3:
[root@server2 ~]# yum install -y nfs-utils
[root@server2 ~]# /etc/init.d/rpcbind start
Starting rpcbind: [ OK ]
[root@server2 ~]# useradd -u 800 hadoop
[root@server2 ~]# mount 172.25.20.1:/home/hadoop /home/hadoop/
[root@server2 ~]# su - hadoop
Server1:
[hadoop@server1 ~]$ ssh 172.25.20.2
[hadoop@server1 ~]$ ssh 172.25.20.3
[hadoop@server1 ~]$ ssh server2
[hadoop@server1 ~]$ ssh server3
[hadoop@server1 ~]$ vim hadoop/etc/hadoop/slaves
172.25.20.2
172.25.20.3
[hadoop@server1 ~]$ cd /tmp/
[hadoop@server1 tmp]$ ls
hadoop-hadoop Jetty_0_0_0_0_50090_secondary____y6aanv
hsperfdata_hadoop Jetty_localhost_57450_datanode____ycac0k
Jetty_0_0_0_0_50070_hdfs____w2cu08
[hadoop@server1 tmp]$ rm -rf *
[hadoop@server1 ~]$ vim hadoop/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
</configuration>
[hadoop@server1 hadoop]$ vim core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://server1:9000</value>
</property>
</configuration>
[hadoop@server1 hadoop]$ bin/hdfs namenode -format
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
Starting namenodes on [server1]
server1: starting namenode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-namenode-server1.out
172.25.20.2: starting datanode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-datanode-server2.out
172.25.20.3: starting datanode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-datanode-server3.out
Starting secondary namenodes [0.0.0.0]
0.0.0.0: starting secondarynamenode, logging to /home/hadoop/hadoop-2.7.3/logs/hadoop-hadoop-secondarynamenode-server1.out
[hadoop@server1 hadoop]$ jps
3356 Jps
3059 NameNode
3247 SecondaryNameNode