1.hadoop概念
Hadoop是一个由Apache基金会所开发的分布式系统基础架构。用户可以在不了解分布式底层细节的情况下,开发分布式程序。充分利用集群的威力进行高速运算和存储。
Hadoop实现了一个分布式文件系统(HadoopDistributed File System),简称HDFS。HDFS有高容错性的特点,并且设计用来部署在低廉的(low-cost)硬件上;而且它提供高吞吐量(high throughput)来访问应用程序的数据,适合那些有着超大数据集(largedata set)的应用程序。HDFS放宽了(relax)POSIX的要求,可以以流的形式访问(streaming access)文件系统中的数据。
Hadoop的框架最核心的设计就是:HDFS和MapReduce。HDFS为海量的数据提供了存储,则MapReduce为海量的数据提供了计算。
2.部署hadoop
环境:redhat7.2 vm6.5
hostname 虚拟机ip
server1 172.25.43.1 master
server2 172.25.43.2 slave
server3 172.25.43.3 slave
server4 172.25.43.4 slave
server5 172.25.43.5 slave
注:节点个数必须是奇数
[root@server1 ~]# useradd -u 1000 hadoop ##所有的节点的hadoop用户信息必须一致
[root@server1 ~]# passwd hadoop
[root@server1 ~]# su - hadoop
[hadoop@server1 ~]$ ls
hadoop-2.7.3.tar.gz jdk-7u79-linux-x64.tar.gz ##需要的包
[hadoop@server1 ~]$ tar zxf hadoop-2.7.3.tar.gz ##解压
[hadoop@server1 ~]$ tar zxf jdk-7u79-linux-x64.tar.gz
[hadoop@server1 ~]$ ln -s hadoop-2.7.3/ hadoop
[hadoop@server1 ~]$ ln -s jdk1.7.0_79/ java
[root@server1 ~]# vim /etc/profile ##配置环境变量
vim:
export JAVA_HOME=/home/hadoop/java
export CLASSPATH=:$JAVA_HOME/lib:$JAVA_HOME/jre/lib
export PATH=$PATH:$JAVA_HOME/bin
:wq
[root@server1 ~]# source /etc/profile
[root@server1 ~]# echo $PATH
[hadoop@server1 ~]$ which jps
~/java/bin/jps
[hadoop@server1 ~]$ jps
1167 Jps
[hadoop@server1 ~]$ vim hadoop/etc/hadoop/hadoop-env.sh
vim:
export JAVA_HOME=/home/hadoop/java ##修改为/home/hadoop/java
:wq
[hadoop@server1 ~]$ hadoop/bin/hadoop
[hadoop@server1 ~]$ cd hadoop/
[hadoop@server1 hadoop]$ mkdir input
[hadoop@server1 hadoop]$ cp etc/hadoop/* input/
[hadoop@server1 hadoop]$ bin/hadoop jar share/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jargrep input output 'dfs[a-z.]+'
[hadoop@server1 hadoop]$ vim etc/hadoop/core-size.xml
vim:
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://172.25.43.1:9000</value> ##本机ip
</property>
</configuration>
:wq
[hadoop@server1 hadoop]$ vim etc/hadoop/hdfs-site.xml
vim:
<configuration>
<property>
<name>dfs.replication</name>
<value>2</value> ##文件保存2份
</property>
</configuration>
:wq
[hadoop@server1 hadoop]$ vim etc/hadoop/slaves
vim:
172.25.43.1
:wq
[hadoop@server1 hadoop]$ ssh-keygen
[hadoop@server1 hadoop]$ ssh-copy-id 172.25.43.1
测试:
[hadoop@server1 hadoop]$ ssh 172.25.43.1
[hadoop@server1 hadoop]$ logout
[hadoop@server1 hadoop]$ bin/hdfs namedode -format
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
[hadoop@server1 hadoop]$ jps
[hadoop@server1 hadoop]$ ps ax
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user
[hadoop@server1 hadoop]$ bin/hdfs dfs -mkdir /user/hadoop
[hadoop@server1 hadoop]$ bin/hdfs dfs -ls
[hadoop@server1 hadoop]$ bin/hdfs dfs -put etc/hadoop/ test
[hadoop@server1 hadoop]$ bin/hadoop jarshare/hadoop/mapreduce/hadoop-mapreduce-examples-2.7.3.jar grep test output'dfs[a-z.]+'
[hadoop@server1 hadoop]$ rm -rf output/
[hadoop@server1 hadoop]$ bin/hdfs dfs -get output
[hadoop@server1 hadoop]$ cp etc/hadoop/mapred-site.xml.templatemapred-site.xml
[hadoop@server1 hadoop]$ vim etc/hadoop/mapred-site.xml
vim:
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
:wq
[hadoop@server1 hadoop]$ vim etc/hadoop/yarn-site.xml
vim:
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
:wq
[hadoop@server1 hadoop]$ sbin/stop-yarn.sh
[root@server1 ~]# yum install -y nfs-utils
[root@server1 ~]# /etc/init.d/rpcbind start
[root@server1 ~]# vim /etc/exports
vim:
/home/hadoop 172.25.43.0/255.255.255.0(rw,anonuid=1000,anongid=1000)
:wq
[root@server1 ~]# /etc/init.d/nfs start
[root@server1 ~]# showmount -e
[hadoop@server1 hadoop]$ sbin/start-dfs.sh
[hadoop@server1 hadoop]$ cat /etc/hosts
[root@server2 ~]# yum install -y nfs-utils
[root@server2 ~]# /etc/init.d/rpcbind start
[root@server2 ~]# useradd -u 1000 hadoop
[root@server2 ~]# mount 172.25.43.1:/home/hadoop /home/hadoop/
[root@server2 ~]# ll -d /home/hadoop
[hadoop@server2 hadoop]$ vim etc/hadoop/slaves
vim:
172.25.43.2
172.25.43.3
:wq
[hadoop@server2 hadoop]$ sbin/hadoop-daemon.sh start datanode
[hadoop@server2 hadoop]$ jps
server3和server2一样