1. 安装jdk
2. 上传并解压hadoop
3. 配置免密登录
如果是集群模式,localhost替换为其他服务器的ip地址或主机名,所有的服务器之间均需要配置免密登陆
$ ssh-keygen
$ ssh-copy-id root@localhost
4. 配置${hadoop_home}/etc/hadoop/core-site.xml
<configuration>
<property>
<name>hadoop.tmp.dir</name>
<!—临时数据存放地址-->
<value>file:/usr/local/soft/hadoop/tmp</value>
<description>Abase for othertemporary directories.</description>
</property>
<property>
<!—namenode地址-->
<name>fs.defaultFS</name>
<value>hdfs://localhost:9000</value>
</property>
<property>
<!--secondNameNode数据归档时间-->
<name>fs.checkpoint.period</name>
<value>3600</value>
<description>The number of secondsbetween two periodic checkpoints.
</description>
</property>
<!--控制edit log的大小,当edit log的大小超过这个限制了就进行checkpoing-->
<property>
<name>fs.checkpoint.size</name>
<value>67108864</value>
</property>
</configuration>
5. 配置${hadoop_home}/etc/hadoop/hdfs-site.xml
<configuration>
<property>
<!—数据副本数量-->
<name>dfs.replication</name>
<value>1</value>
</property>
<property>
<!—数据目录存放地址-->
<name>dfs.namenode.name.dir</name>
<value>file:/usr/local/soft/hadoop/tmp/dfs/name</value>
</property>
<property>
<!—数据文件存放地址-->
<name>dfs.datanode.data.dir</name>
<value>file:/usr/local/soft/hadoop/tmp/dfs/data</value>
</property>
<!--secondNameNode地址-->
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>data2.ccsp.yota:50090</value>
</property>
</configuration>
6. 配置${hadoop_home}/etc/hadoop/mapred-site.xml
<configuration>
<property>
<!--指定mapreduce读取目录可以使用目录读取-->
<name>mapreduce.input.fileinputformat.input.dir.recursive</name>
<value>true</value>
</property>
<property>
<!--指定mapreduce运行在yarn上-->
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<!—tracker交互地址-->
<name>mapred.job.tracker</name>
<value>bigdata1.ccsp.jv:9001</value>
</property>
<property>
<!—log查询页面-->
<name>yarn.log.server.url</name>
<value>http://172.16.7.30:19888/jobhistory/logs/</value>
</property>
</configuration>
7. 配置${hadoop_home}/etc/hadoop/yarn-site.xml
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<!--设置log记录到hdfs中-->
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<description></description>
</property>
<property>
<!--指定yarn的老大resourcemanager的地址-->
<name>yarn.resourcemanager.hostname</name>
<value>bigdata1.ccsp.jv</value>
</property>
<property>
<!--NodeManager获取数据的方式-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
8. 配置${hadoop_home}/etc/hadoop/slaves
datanode的地址列表,每行一个
vim slaves
9. 格式化namenode
$ ./bin/hdfs namenode -format
10. 启动hadoop
最后一行是启动日志记录
$ ./sbin/start-dfs.sh
$ ./sbin/start-yarn.sh
$ ./sbin/ mr-jobhistory-daemon.sh starthistoryserver