hadoop 完全分布式 hadoop-3.3.6
前置条件: ↓
- hadoop-3.3.6版本及更高版本(运行时)支持jdk-8和jdk-11,建议使用jdk-8
配置环境变量 /etc/profile
export HADOOP_HOME=/opt/module/hadoop
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
hadoop-env.sh 需要配置jdk环境变量和用户权限
export JAVA_HOME=/opt/module/jdk
export HDFS_NAMENODE_USER=root
export HDFS_DATANODE_USER=root
export HDFS_JOURNALNODE_USER=root
export HDFS_SECONDARYNAMENODE_USER=root
yarn-env.sh 需要配置jdk的环境变量和用户权限
export JAVA_HOME=/opt/module/jdk
export YARN_NODEMANAGER_USER=root
export YARN_RESOURCEMANAGER_USER=root
以下为核心配置文件
- core-site.xml
<property>
<name>fs.defaultFS</name>
<value>hdfs://master:9000</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/module/hadoop/datas/tmpdir</value>
</property>
<!--设置 Hadoop Web 服务请求使用的静态用户身份-->
<property>
<name>hadoop.http.staticuser.user</name>
<value>root</value>
</property>
<!--设置 root 用户身份可以代理的主机 * 表示所有-->
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<!--设置 root 用户可以代理的用户组 * 表示所有-->
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
- hdfs-site.xml
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/opt/module/hadoop/datas/namedir</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/opt/module/hadoop/datas/datadir</value>
</property>
<property>
<name>dfs.namenode.http-address</name>
<value>master:50070</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>slave1:50090</value>
</property>
- yarn-site.xml
<!--指定resourcemanager的地址和端口-->
<property>
<name>yarn.resourcemanager.address</name>
<value>master:8032</value>
</property>
<!--指定resourcemanager的web地址和端口-->
<property>
<name>yarn.resourcemanager.webapp.address</name>
<value>master:8088</value>
</property>
<!--指定nodemanager启动时加载server的方式-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<!--日志聚集功能-->
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
</property>
<!--删除聚合日志之前保留聚合日志的时间-->
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>86400</value>
</property>
<!--检查并删除过期日志文件的间隔时间-->
<property>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>3600</value>
</property>
<!--远程日志聚合目录的根目录(hdfs)-->
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/yarn-logs</value>
</property>
<!--远程日志目录后缀-->
<property>
<name>yarn.nodemanager.remote-app-log-dir-suffix</name>
<value>logs</value>
</property>
<!--yarn日志服务器的url-->
<property>
<name>yarn.log.server.url</name>
<value>http://slave2:19888/jobhistory/logs</value>
</property>
<!--禁用yarn对容器进行物理内存限制 => 可以避免出现物理内存溢出导致容器被kill的错误-->
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!--禁用yarn对容器进行虚拟内存限制 => 可以避免出现虚拟内存溢出导致容器被kill的错误-->
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
- mapred-site.xml
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>slave2:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>slave2:19888</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.http.address</name>
<value>slave2:19890</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>mapred-history/tmpdir</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>mapred-history/donedir</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/opt/module/hadoop</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/opt/module/hadoop</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/opt/module/hadoop</value>
</property>
<!--设置yarn的类路径到mapreduce-site文件里面,可以避免出现mapreduce运行时出现找不到主类的错误,类路径只能写完整路径-->
<property>
<name>yarn.application.classpath</name>
<value>
/opt/module/hadoop/etc/hadoop,
/opt/module/hadoop/share/hadoop/common/lib/*,
/opt/module/hadoop/share/hadoop/common/*,
/opt/module/hadoop/share/hadoop/hdfs,
/opt/module/hadoop/share/hadoop/hdfs/lib/*,
/opt/module/hadoop/share/hadoop/hdfs/*,
/opt/module/hadoop/share/hadoop/mapreduce/lib/*,
/opt/module/hadoop/share/hadoop/mapreduce/*,
/opt/module/hadoop/share/hadoop/yarn,
/opt/module/hadoop/share/hadoop/yarn/lib/*,
/opt/module/hadoop/share/hadoop/yarn/*
</value>
</property>
- workers
master
slave1
slave2
执行以下命令步骤启动hadoop
- 格式化namenode:在master节点格式化
hdfs namenode -format- 启动集群节点hdfs、yarn一起启动:在master节点启动
start-all.sh- 启动历史(日志)服务器:在slave2节点启动
mapred --daemon start historyserver- 使用jps验证是否启动成功
master:
- NameNode
- DataNode
- NodeManager
- ResourceManager
slave1:
- DataNode
- NodeManager
- SecondaryNameNode
slave2:
- DataNode
- NodeManager
- JobHistoryServer
1295

被折叠的 条评论
为什么被折叠?



