目录
4.将Hadoop和环境配置文件分发到hadoop2和hadoop3中
1.Hadoop安装
我们依照之前规划的集群进行搭建。
不明白可以去的集群架构中查看
生成集群其他的虚拟机并进行相关准备_EEEurekaaa!的博客-CSDN博客
这里采用的Hadoop版本是3.1.4,不要选择太新的版本
安装包可以直接去官网下载
首先要安装Hadoop
先把安装包放入software文件夹中
把该文件包解压到servers路径下
tar -xvzf hadoop-3.1.4.tar.gz -C ../servers/
2.Hadoop的配置文件编写
我们需要对这6个文件进行修改
2.1 Hadoop-env.sh
这个文件主要是规定服务器中jdk的位置
以及hdf和yarn的操作用户
代码如下:
2.2workers文件
指定集群中工作的服务器名
2.3 core-site.xml文件
核心调度文件
configuration处添加如下代码
<configuration>
<!--用于设置Hadoop的文件系统-->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1</value>
</property>
<!--用于设置Hadoop的临时文件目录-->
<property>
<name>hadoop.tmp.dir</name>
<value>/export/data/hadoop/tmp</value>
</property>
<!--设置zookeeper-->
<property>
<name>ha.zookeeper.quorum</name>
<value>hadooop1:2181,hadoop2:2181,hadoop3:2181</value>
</property>
</configuration>
2.4 hdfs-site.xml 文件
hdfs的详细配置 namenode datanode 和journalnode。。
代码较长这里直接全部粘贴过来:
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--用于设置hadoop的HDFS的副本的数目-->
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<!--用于设置hadoop的namenode的name数据的目录-->
<property>
<name>dfs.namenode.name.dir</name>
<value>/export/data/hadoop/name</value>
</property>
<!--用于设置hadoop的datanode的data数据的目录-->
<property>
<name>dfs.datanode.data.dir</name>
<value>/export/data/hadoop/data</value>
</property>
<!--开启webHDFS-->
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<!--指定nameservice为ns1-->
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<!--指定ns1下面有两个namenode,nn1和nn2 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!--指定nn1的rpc地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>hadoop1:9000</value>
</property>
<!--指定nn1的http地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>hadoop1:50070</value>
</property>
<!--指定nn2的rpc地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>hadoop2:9000</value>
</property>
<!--指定nn2的http地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>hadoop2:50070</value>
</property>
<!--指定nm的元数据在journalnode上的存放位置-->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop1:8485;hadoop2:8485;hadoop3:8485/ns1</value>
</property>
<!--指定nn2的http地址 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/export/data/hadoop/journaldata</value>
</property>
<!--开启namenode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!--配置namenode失败自动切换的实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!--配置隔离机制的方法-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!--开启sshfence隔离的免登录-->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!--开启sshfence隔离的超时时间-->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
</configuration>
2.5 yarn-site.xml
直接贴代码
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!--用于设置hadoop的yarn是否需要辅助shuffle-->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>false</value>
</property>
<!--是否开启RM ha,默认是开启的-->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!--声明两台resourcemanager的地址-->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop2</value>
</property>
<!--指定zookeeper集群的地址-->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop1:2181,hadoop2:2181,hadoop3:2181</value>
</property>
<!--启用自动恢复,当任务进行一半,rm坏掉,就要启动自动恢复,默认是false-->
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<!--指定resourcemanager的状态信息存储在zookeeper集群,默认是存放在FileSystem里面。-->
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<!--处理高可用状态下的yarnweb的端口-->
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop2:8088</value>
</property>
</configuration>
2.6 mapred-site.xml
<configuration>
<!--用于设置MapReduce的运行框架为yarn-->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>yarn.app.mapreduce.am.env</name>
<value>HADOOP_MAPRED_HOME=/export/servers/hadoop-3.1.4/</value>
</property>
<property>
<name>mapreduce.map.env</name>
<value>HADOOP_MAPRED_HOME=/export/servers/hadoop-3.1.4/</value>
</property>
<property>
<name>mapreduce.reduce.env</name>
<value>HADOOP_MAPRED_HOME=/export/servers/hadoop-3.1.4/</value>
</property>
</configuration>
3.在环境变量中配置Hadoop的环境
vi /etc/profile
#HADOOP环境变量配置
export HADOOP_HOME=/export/servers/hadoop-3.1.4
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin
4.将Hadoop和环境配置文件分发到hadoop2和hadoop3中
scp -r hadoop-3.1.4 hadoop2:/export/servers/
scp -r hadoop-3.1.4 hadoop3:/export/servers/
scp /etc/profile hadoop2:/etc/
scp /etc/profile hadoop3:/etc/
并使Hadoop的环境变量生效
5.HA集群初次启动
先启动zookeeper
启动各节点监控NM的管理日志JournalNode
注意以上两步操作分别要在三台虚拟机上操作,后续journalnode在启动hdfs时会同时启动
在hadoop1上格式化NM,并将格式化后的目录复制到hadoop2中
在hadoop1上格式化ZKFC
之后启动hdfs
启动yarn
启动成功
这说明安装时成功的,后续启动时先启动zookeeper之后再启动hdfs和yarn即可