master | slave1 | slave2 |
---|---|---|
HDFS | ||
ResourceManager | ||
SecondaryNameNode | ||
NameNode | ||
DataNode | DataNode | DataNode |
NodeManager | NodeManager | NodeManager |
YARN |
注意:本实验将按照三个节点表格来进行部署。
由于笔者时间关系暂时只放出配置文件后续将继续更新Hadoop安装 以及操作 总之就是学习记录。
<!-- mapred-site.xml-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 指定mr运行在yarn上 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 历史服务器端地址 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master:10020</value>
</property>
<!-- 历史服务器 web 端地址 -->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master:19888</value>
</property>
</configuration>
<!--yarn-site.xml-->
<configuration>
<!-- Site specific YARN configuration properties -->
<property>
<!--
yarn.nodemanager.aux-services
NodeManager上运行的附属服务。需配置成mapreduce_shuffle,才可运行MapReduce程序
-->
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<!--
yarn.resourcemanager.address
ResourceManager 对客户端暴露的地址。客户端通过该地址向RM提交应用程序,杀死应用程序等。
-->
<name>yarn.resourcemanager.address</name>
<value>master:18040</value>
</property>
<property>
<!--
yarn.resourcemanager.scheduler.address
ResourceManager 对ApplicationMaster暴露的访问地址。ApplicationMaster通过该地址向RM申请资源、释放资源等。
-->
<name>yarn.resourcemanager.scheduler.address</name>
<value>master:18030</value>
</property>
<property>
<!--
yarn.resourcemanager.resource-tracker.address
ResourceManager 对NodeManager暴露的地址.。NodeManager通过该地址向RM汇报心跳,领取任务等。。
-->
<name>yarn.resourcemanager.resource-tracker.address</name>
<value>master:18025</value>
</property>
<property>
<!--
yarn.resourcemanager.admin.address ResourceManager 对管理员暴露的访问地址。管理员通过该地址向RM发送管理命令等。
默认值:${yarn.resourcemanager.hostname}:8033
-->
<name>yarn.resourcemanager.admin.address</name>
<value>master:18141</value>
</property>
<property>
<!--
yarn.resourcemanager.webapp.address
ResourceManager对外web ui地址。用户可通过该地址在浏览器中查看集群各类信息。
-->
<name>yarn.resourcemanager.webapp.address</name>
<value>master:18088</value>
</property>
<--! 日志聚集功能使能 使它可以到historyserver里面显示 -->
<property>
<description>Whether to enable log aggregation. Log aggregation collects
each container's logs and moves these logs onto a file-system, for e.g.
HDFS, after the application completes. Users can configure the
"yarn.nodemanager.remote-app-log-dir" and
"yarn.nodemanager.remote-app-log-dir-suffix" properties to determine
where these logs are moved to. Users can access the logs via the
Application Timeline Server.
</description>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<--! 日志保留时间设置7天 -->
<property>
<description>How long to wait between aggregated log retention checks.
If set to 0 or a negative value then the value is computed as one-tenth
of the aggregated log retention time. Be careful set this too small and
you will spam the name node.</description>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>604800</value>
</property>
</configuration>
<!--hdfs-site.xml -->
<configuration>
<property>
<!-- 完全分布式设置每一块文件的副本数量 伪分布式设置为 1 -->
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<!-- dfs.namenode.name.dir表示NameNode元数据存放位置 是存储fsimage文件的地方 -->
<name>dfs.namenode.name.dir</name>
<value>file:/home/whzy/hadoopdata/dfs/name</value>
</property>
<property>
<!-- dfs.namenode.data.dir表示DataNode在本地磁盘存放HDFS数据block的位置 -->
<name>dfs.namenode.data.dir</name>
<value>file:/home/whzy/hadoopdata/dfs/data</value>
</property>
<property>
<!-- dfs.permissions权限还是不要的好(默认为True 建议改为false) -->
<name>dfs.permissions</name>
<value>false</value>
</property>
<property>
<!-- dfs.namenode.secondary.http-address 表示在辅助节点的地址 这里是设置的 slave1 -->
<name>dfs.namenode.secondary.http-address</name>
<value>slave1:50090</value>
<description>
The secondary namenode http server address and port.
</description>
</property>
</configuration>
slaves
这个文件是配置所有的slave节点的
master
slave1
slave2