下载地址:http://archive.apache.org/dist/hadoop/core/hadoop-3.2.2/
版本:3.2.2
文件名:hadoop-3.2.2.tar.gz
服务器信息
IP地址 | 主机名 |
---|---|
192.168.1.71 | hadoop1 |
192.168.1.72 | hadoop2 |
192.168.1.73 | hadoop3 |
备注:默认是在hadoop1服务器上操作
1. 解压文件
tar -zxvf hadoop-3.2.2.tar.gz
2. 配置文件
路径:etc/hadoop
- core-site.xml
<configuration>
<property>
<name>fs.default.name</name>
<value>hdfs://hadoop1:8020</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/export/service/hadoop-3.2.2/data/temp</value>
</property>
<!-- 缓冲区大小, 实际工作中根据服务器性能动态调整-->
<property>
<name>io.file.buffer.size</name>
<value>4096</value>
</property>
<!-- 开启hdfs的垃圾桶机制, 删除掉的数据可以从垃圾桶中回收 单位:分钟-->
<property>
<name>fs.trash.interval</name>
<value>10080</value>
</property>
</configuration>
- hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>hadoop1:50090</value>
</property>
<!-- 指定namenode的访问地址和端口-->
<property>
<name>dfs.namenode.http-address</name>
<value>hadoop1:50070</value>
</property>
<!-- 存储元数据位置-->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:///export/service/hadoop-3.2.2/data/namenode</value>
</property>
<!-- datanode数据存储位置-->
<property>
<name>dfs.datanode.data.dir</name>
<value>file:///export/service/hadoop-3.2.2/data/datanode</value>
</property>
<!-- datanode日志文件存储位置-->
<property>
<name>dfs.datanode.edits.dir</name>
<value>file:///export/service/hadoop-3.2.2/data/edits</value>
</property>
<property>
<name>dfs.datanode.checkpoint.dir</name>
<value>file:///export/service/hadoop-3.2.2/data/checkpoint</value>
</property>
<property>
<name>dfs.datanode.checkpoint.edits.dir</name>
<value>file:///export/service/hadoop-3.2.2/data/cpedits</value>
</property>
<!-- 文件存储副本个数-->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 设置hdfs文件权限-->
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
<!-- 设置一个文件切片的大小 128M-->
<property>
<name>dfs.blocksize</name>
<value>134217728</value>
</property>
</configuration>
- hadoop-env.sh
# 配置jdk路径
export JAVA_HOME=/home/sjyy/software/jdk
- mapred-site.xml
<configuration>
<!-- 开启mapreduce小任务模式-->
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<!-- 设置历史任务的主机和端口-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop1:10020</value>
</property>
<!-- 设置网页访问历史任务的主机和端口-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop1:19888</value>
</property>
</configuration>
- yarn-site.xml
<configuration>
<!-- 开启mapreduce小任务模式-->
<property>
<name>mapreduce.job.ubertask.enable</name>
<value>true</value>
</property>
<!-- 设置历史任务的主机和端口-->
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop1:10020</value>
</property>
<!-- 设置网页访问历史任务的主机和端口-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop1:19888</value>
</property>
</configuration>
- workers
hadoop1
hadoop2
hadoop2
3. 分发
scp hadoop-3.2.2/ hadoop2:$PWD
scp hadoop-3.2.2/ hadoop2:$PWD
4. 配置环境变量(三台都执行)
vim /etc/profile
export HADOOP_HOME=/home/sjyy/service/hadoop-3.2.2
export PATH=:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
source /etc/profile
5. 启动和关闭
cd /home/sjyy/service/hadoop-3.2.2
# hdfs格式化 ,只执行一次,后续启动不在执行
bin/hdfs namenode -format
# 启动hdfs和yarn
sbin/start-all.sh
# 关闭
sbin/stop-all.sh
# 启动历史任务
sbin/mr-jobhistory-daemon.sh start historyserver
# 关闭历史任务
sbin/mr-jobhistory-daemon.sh stop historyserver
6. 查看
- hdfs:http://192.168.1.71:50070
- yarn:http://192.168.1.71:8088
- history:http://192.168.1.71:19888