首先介绍单机模式的搭建过程:
(1)下载hadoop 0.19.1版本,并解压缩到 /home/admin/hadoop目录下,修改报名为hadoop
[root@localhost admin]# wget http://archive.apache.org/dist/hadoop/core/hadoop-0.19.1/hadoop-0.19.1.tar.gz
[root@localhost admin]# tar -xzvf hadoop-0.19.1.tar.gz
[root@localhost admin]# mv hadoop-0.19.1 hadoop
(2) 在全局配置文件 /etc/profile 中增加下面的内容:
export HADOOP_HOME=/home/admin/hadoop
export PATH=$HADOOP_HOME/bin:$PATH
(3)修改 /home/admin/hadoop/conf/hadoop-env.sh 文件,增加下面的内容:
export JAVA_HOME=/opt/jdk1.6.0_17
export HADOOP_HEAPSIZE=200
其中,/opt/jdk1.6.0_17是你安装的jdk的路径
(4)修改 /home/admin/hadoop/conf/hadoop-site.xml 文件,建议直接用下面的内容替换该文件
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<name>mapred.job.tracker</name>
<value>localhost:9001</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/admin/tmp</value>
</property>
<property>
<name>dfs.name.dir</name>
<value>/home/admin/filesystem/name</value>
</property>
<property>
<name>dfs.data.dir</name>
<value>/home/admin/filesystem/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>1</value>
</property>
</configuration>
(5)免密码登录
A.在NameNode上生成公钥和私钥
[root@localhost admin]# ssh-keygen -t dsa -P '' -f ~/.ssh/id_dsa
并将公钥信息追加到授权key 中
[root@localhost admin]# cat ~/.ssh/id_dsa.pub >> ~/.ssh/authorized_keys
(6)修改文件格式
[root@localhost admin]# cd $HADOOP_HOME
[root@localhost hadoop]# bin/hadoop namenode -format
(7)启动hadoop:
[root@localhost admin]# cd $HADOOP_HOME
[root@localhost hadoop]# bin/start-all.sh
可以执行jps命令查看一下启动的线程:
[root@localhost hive]# /opt/jdk1.6.0_17/bin/jps
18890 TaskTracker
19083 Jps
18692 SecondaryNameNode
18763 JobTracker
18445 NameNode
18576 DataNode
(8)使用hive:
[root@localhost conf]# export HADOOP=/home/admin/hadoop
[root@localhost conf]# export HIVE_HOME=/home/admin/hadoop/contrib/hive/
[root@localhost conf]# cd $HIVE_HOME
[root@localhost hive]# bin/hive
hive> CREATE TABLE yuanchuo(id INT,name STRING);
OK
Time taken: 4.028 seconds
下面介绍的是linux环境下一台namenode,2台datanode的搭建过程。
(1)2台datanode机器安装hadoop的步骤和单机模式下基本一致,安装完成后先用启动hadoop。
(2)将NameNode上生成的公钥文件id_dsa.pub拷贝到DataNode上,并将内容追加到授权key 中。
(3)修改NameNode机器上/home/admin/hadoop/conf/目录下的slaves文件,配置为2台datanode的ip地址。
(4)在NameNode上启动hadoop
[root@localhost admin]# cd $HADOOP_HOME
[root@localhost hadoop]# bin/start-all.sh
starting namenode, logging to /home/admin/hadoop/bin/../logs/hadoop-root-namenode-localhost.localdomain.out
10.249.170.128: starting datanode, logging to /home/admin/hadoop/bin/../logs/hadoop-root-datanode-localhost.localdomain.out
10.249.168.129: starting datanode, logging to /home/admin/hadoop/bin/../logs/hadoop-root-datanode-localhost.localdomain.out
localhost: starting secondarynamenode, logging to /home/admin/hadoop/bin/../logs/hadoop-root-secondarynamenode-localhost.localdomain.out
starting jobtracker, logging to /home/admin/hadoop/bin/../logs/hadoop-root-jobtracker-localhost.localdomain.out
10.249.168.129: starting tasktracker, logging to /home/admin/hadoop/bin/../logs/hadoop-root-tasktracker-localhost.localdomain.out
10.249.170.128: starting tasktracker, logging to /home/admin/hadoop/bin/../logs/hadoop-root-tasktracker-localhost.localdomain.out
最后我们运行hadoop提供的例子测试功能是否正常:
[root@localhost tmp]# echo "yuanchuo doudou xiaobao xiaobao" > tmp01
[root@localhost tmp]# echo "doudou xiaobao xiaobao" > tmp02
[root@localhost tmp]# hadoop fs -mkdir input
[root@localhost tmp]# hadoop fs -copyFromLocal /home/admin/tmp/tmp0* input
[root@localhost tmp]# cd $HADOOP_HOME
[root@localhost hadoop]# hadoop jar hadoop-0.19.1-examples.jar wordcount input output
10/05/29 10:12:22 INFO mapred.FileInputFormat: Total input paths to process : 2
10/05/29 10:12:22 INFO mapred.JobClient: Running job: job_201005291000_0001
10/05/29 10:12:23 INFO mapred.JobClient: map 0% reduce 0%
10/05/29 10:12:24 INFO mapred.JobClient: map 33% reduce 0%
10/05/29 10:12:25 INFO mapred.JobClient: map 66% reduce 0%
10/05/29 10:12:26 INFO mapred.JobClient: map 100% reduce 0%
10/05/29 10:12:32 INFO mapred.JobClient: map 100% reduce 100%
10/05/29 10:12:33 INFO mapred.JobClient: Job complete: job_201005291000_0001
10/05/29 10:12:33 INFO mapred.JobClient: Counters: 16
10/05/29 10:12:33 INFO mapred.JobClient: File Systems
10/05/29 10:12:33 INFO mapred.JobClient: HDFS bytes read=61
10/05/29 10:12:33 INFO mapred.JobClient: HDFS bytes written=30
10/05/29 10:12:33 INFO mapred.JobClient: Local bytes read=75
10/05/29 10:12:33 INFO mapred.JobClient: Local bytes written=246
10/05/29 10:12:33 INFO mapred.JobClient: Job Counters
10/05/29 10:12:33 INFO mapred.JobClient: Launched reduce tasks=1
10/05/29 10:12:33 INFO mapred.JobClient: Launched map tasks=3
10/05/29 10:12:33 INFO mapred.JobClient: Data-local map tasks=3
10/05/29 10:12:33 INFO mapred.JobClient: Map-Reduce Framework
10/05/29 10:12:33 INFO mapred.JobClient: Reduce input groups=3
10/05/29 10:12:33 INFO mapred.JobClient: Combine output records=5
10/05/29 10:12:33 INFO mapred.JobClient: Map input records=2
10/05/29 10:12:33 INFO mapred.JobClient: Reduce output records=3
10/05/29 10:12:33 INFO mapred.JobClient: Map output bytes=83
10/05/29 10:12:33 INFO mapred.JobClient: Map input bytes=55
10/05/29 10:12:33 INFO mapred.JobClient: Combine input records=7
10/05/29 10:12:33 INFO mapred.JobClient: Map output records=7
10/05/29 10:12:33 INFO mapred.JobClient: Reduce input records=5
[root@localhost hadoop]# hadoop fs -ls
Found 2 items
drwxr-xr-x - root supergroup 0 2010-05-29 10:10 /user/root/input
drwxr-xr-x - root supergroup 0 2010-05-29 10:12 /user/root/output
[root@localhost hadoop]# hadoop fs -cat /user/root/output/part-00000
doudou 2
xiaobao 4
yuanchuo 1