hadoop自动故障转移集群搭建详解

安装包地址

https://archive.apache.org/dist/hadoop/common/stable/
选择hadoop-3.3.2.tar.gz包下载

环境

操作系统

[root@localhost ~]# cat /etc/redhat-release 
CentOS Linux release 7.5.1804 (Core) 
[root@localhost ~]# uname -a
Linux localhost.localdomain 3.10.0-862.el7.x86_64 #1 SMP Fri Apr 20 16:44:24 UTC 2018 x86_64 x86_64 x86_64 GNU/Linux

ip和主机名规划

192.168.103.232 d-01
192.168.103.235 d-02
192.168.103.237 d-03

搭建前准备

修改主机名

3个主机分别操作

[root@localhost ~]# hostnamectl set-hostname d-01
[root@localhost ~]# hostnamectl set-hostname d-02
[root@localhost ~]# hostnamectl set-hostname d-03

配置主机名解析

每个主机都操作

echo -e "192.168.103.232 d-01\n192.168.103.235 d-02\n192.168.103.237 d-03" >> /etc/hosts

搭建hadoop单点集群

安装包准备

下载安装包到d-01服务器

wget https://archive.apache.org/dist/hadoop/common/stable/hadoop-3.3.2.tar.gz

解压安装包
只在d-01操作

[root@d-01 ~]# tar zxvf hadoop-3.3.2.tar.gz -C /opt/

安装java

yum -y install java java-devel

修改配置文件

只在d-01修改

env文件

[root@d-01 hadoop]# egrep -v "^#|^$" /opt/hadoop-3.3.2/etc/hadoop/hadoop-env.sh 
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.322.b06-1.el7_9.x86_64/
export HADOOP_HOME=/opt/hadoop-3.3.2
export HADOOP_HEAPSIZE_MAX=512m
export HADOOP_HEAPSIZE_MIN=512m
export HADOOP_OS_TYPE=${HADOOP_OS_TYPE:-$(uname -s)}
export HDFS_NAMENODE_USER=hdfs
export HDFS_DATANODE_USER=hdfs
export HDFS_SECONDARYNAMENODE_USER=hdfs
export YARN_RESOURCEMANAGER_USER=yarn
export YARN_NODEMANAGER_USER=yarn

核心文件

[root@d-01 hadoop]# egrep -v "^#|^$" /opt/hadoop-3.3.2/etc/hadoop/core-site.xml 
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <property>
        <name>fs.defaultFS</name>
        <!--value>hdfs://mycluster</value-->
        <value>hdfs://d-01:9000</value>
    </property>
        
    <property>
        <name>io.file.buffer.size</name>
        <value>131072</value>
    </property>
    <!--解决web界面创建文件权限拒绝的问题-->
    <property>
        <name>hadoop.http.staticuser.user</name>
        <value>hdfs</value>
    </property>
    
    <!--property>
        <name>dfs.permissions.enabled</name>
        <value>false</value>
    </property-->
    <property>  
        <name>hadoop.proxyuser.hdfs.hosts</name>  
        <value>*</value>  
    </property>  
    <property>  
        <name>hadoop.proxyuser.hdfs.groups</name>  
        <value>*</value>  
    </property>
</configuration>

hdfs文件

[root@d-01 hadoop]# egrep -v "^#|^$" /opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml 
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
    <proprety>
        <name>dfs.namenode.name.dir</name>
        <value>file:/opt/hadoop-3.3.2/namenode,file:/opt/hadoop-3.3.2/namenodebak</value>
    </proprety>
    <!--proprety>
        <name>dfs.hosts</name>
        <value>d-01,d-02,d-03</value>
    </proprety-->
    <proprety>
        <name>dfs.blocksize</name>
        <value>268435456</value>
    </proprety>
    <proprety>
        <name>dfs.namenode.handler.count</name>
        <value>10</value>
    </proprety>
    <proprety>
        <name>dfs.datanode.data.dir</name>
        <value>file:/opt/hadoop-3.3.2/datanode,file:/opt/hadoop-3.3.2/datanodebak</value>
    </proprety>
    <property>
        <name>dfs.namenode.http.address</name>
        <value>d-01:9870</value>
    </property>
    <property>
        <name>dfs.webhdfs.enabled</name>
        <value>true</value>
    </property>
    <!--配置namenode高可用>
    <property>
        <name>dfs.nameservices</name>
        <value>mycluster</value>
    </property>
    <property>
        <name>dfs.ha.namenodes.mycluster</name>
        <value>nn1,nn2,nn3</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn1</name>
        <value>d-01:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn2</name>
        <value>d-02:8020</value>
    </property>
    <property>
        <name>dfs.namenode.rpc-address.mycluster.nn3</name>
        <value>d-03:8020</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn1</name>
        <value>d-01:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn2</name>
        <value>d-02:9870</value>
    </property>
    <property>
        <name>dfs.namenode.http-address.mycluster.nn3</name>
        <value>d-03:9870</value>
    </property>
    <property>
        <name>dfs.namenode.shared.edits.dir</name>
        <value>qjournal://d-01:8485;d-02:8485;d-03:8485/mycluster</value>
    </property>
    <property>
        <name>dfs.client.failover.proxy.provider.mycluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>
    <property>
        <name>dfs.ha.fencing.methods</name>
        <value>sshfence</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.connect-timeout</name>
        <value>30000</value>
    </property>
    <property>
        <name>dfs.ha.fencing.ssh.private-key-files</name>
        <value>/home/hdfs/.ssh/id_rsa</value>
    </property>
    <property>
        <name>dfs.journalnode.edits.dir</name>
        <value>/opt/hadoop-3.3.2/journal/data</value>
    </property-->
</configuration>

yarn文件

[root@d-01 hadoop]# egrep -v "^#|^$" /opt/hadoop-3.3.2/etc/hadoop/yarn-site.xml 
<?xml version="1.0"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- Site specific YARN configuration properties -->
    <proprety>
        <name>yarn.acl.enable</name>
        <value>false</value>
    </proprety>
    <proprety>
        <name>yarn.admin.acl</name>
        <value>*</value>
    </proprety>
    <proprety>
        <name>yarn.log-aggregation-enable</name>
        <value>false</value>
    </proprety>
    <proprety>
        <!-- Configurations for ResourceManager -->
        <name>yarn.resourcemanager.address</name>
        <value>d-01:8032</value>
    </proprety>
    <proprety>
        <name>yarn.resourcemanager.scheduler.address</name>
        <value>d-01:8030</value>
    </proprety>
    <proprety>
        <name>yarn.resourcemanager.resource-tracker.address</name>
        <value>d-01:8031</value>
    </proprety>
    <proprety>
        <name>yarn.resourcemanager.admin.address</name>
        <value>d-01:8033</value>
    </proprety>
    <proprety>
        <name>yarn.resourcemanager.webapp.address</name>
        <value>d-01:8042</value>
    </proprety>
    <proprety>
        <name>yarn.resourcemanager.scheduler.class</name>
        <value>d-01:</value>
    </proprety>
    <proprety>
        <name>yarn.scheduler.minimum-allocation-mb</name>
        <value>128m</value>
    </proprety>
    <proprety>
        <name>yarn.scheduler.maximum-allocation-mb</name>
        <value>128m</value>
    </proprety>
    <proprety>
        <name>yarn.resourcemanager.nodes.include-path</name>
        <value>/opt/hadoop-3.3.2/resource</value>
    </proprety>
        <!-- Configurations for NodeManager -->
    <proprety>
        <name>yarn.nodemanager.resource.memory-mb</name>
        <value>128m</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.vmem-pmem-ratio</name>
        <value>2</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.local-dirs</name>
        <value>/opt/hadoop-3.3.2/nodemanager01,/opt/hadoop-3.3.2/nodemanager02</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.log-dirs</name>
        <value>/opt/hadoop-3.3.2/logs/nodemanager</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.log.retain-seconds</name>
        <value>10800</value>
    </proprety>
    <!--Only applicable if log-aggregation is enabled.
    <proprety>
        <name>yarn.nodemanager.remote-app-log-dir</name>
        <value>/logs</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.remote-app-log-dir-suffix</name>
        <value>logs</value>
    </proprety>
    -->
    <proprety>
        <name>yarn.nodemanager.aux-services</name>
        <value>mapreduce_shuffle</value>
    </proprety>
    <!--Environment properties to be inherited by containers from NodeManagers. For mapreduce application in addition to the default values HADOOP_MAPRED_HOME should to be added. Property value should JAVA_HOME,HADOOP_COMMON_HOME,HADOOP_HDFS_HOME,HADOOP_CONF_DIR,CLASSPATH_PREPEND_DISTCACHE,HADOOP_YARN_HOME,HADOOP_HOME,PATH,LANG,TZ,HADOOP_MAPRED_HOME
    <proprety>
        <name>yarn.nodemanager.env-whitelist</name>
        <value></value>
    </proprety>
    -->
    <proprety>
        <name>yarn.log-aggregation.retain-seconds</name>
        <value>-1</value>
    </proprety>
    <proprety>
        <name>yarn.log-aggregation.retain-check-interval-seconds</name>
        <value>-1</value>
    </proprety>
  <!--Hadoop provides a mechanism by which administrators can configure the NodeManager to run an administrator supplied script periodically to determine if a node is healthy or not.
Administrators can determine if the node is in a healthy state by performing any checks of their choice in the script. If the script detects the node to be in an unhealthy state, it must print a line to standard output beginning with the string ERROR. The NodeManager spawns the script periodically and checks its output. If the script’s output contains the string ERROR, as described above, the node’s status is reported as unhealthy and the node is black-listed by the ResourceManager. No further tasks will be assigned to this node. However, the NodeManager continues to run the script, so that if the node becomes healthy again, it will be removed from the blacklisted nodes on the ResourceManager automatically. The node’s health along with the output of the script, if it is unhealthy, is available to the administrator in the ResourceManager web interface. The time since the node was healthy is also displayed on the web interface.-->
    <proprety>
        <name>yarn.nodemanager.health-checker.script.path</name>
        <value>/opt/hadoop-3.3.2/scripts</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.health-checker.script.opts</name>
        <value>check-health</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.health-checker.script.opts</name>
        <value>100</value>
    </proprety>
    <proprety>
        <name>yarn.nodemanager.health-checker.script.opts</name>
        <value>100</value>
    </proprety>
    <!--配置yarn proxyweb服务>
    <proprety>
        <name>yarn.web-proxy.address</name>
        <value>192.168.103.232:9999</value>
    </proprety-->
</configuration>

mapreduce文件

[root@d-01 hadoop]# egrep -v "^#|^$" /opt/hadoop-3.3.2/etc/hadoop/mapred-site.xml 
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
  Licensed under the Apache License, Version 2.0 (the "License");
  you may not use this file except in compliance with the License.
  You may obtain a copy of the License at
    http://www.apache.org/licenses/LICENSE-2.0
  Unless required by applicable law or agreed to in writing, software
  distributed under the License is distributed on an "AS IS" BASIS,
  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  See the License for the specific language governing permissions and
  limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
  <!--Configurations for MapReduce Applications-->
    <proprety>
        <name>mapreduce.framework.name</name>
        <value>yarn</value>
    </proprety>
    <proprety>
        <name>mapreduce.map.memory.mb</name>
        <value>1536</value>
    </proprety>
    <!--Larger heap-size for child jvms of maps.-->
    <proprety>
        <name>mapreduce.map.java.opts</name>
        <value>-Xmx1024M</value>
    </proprety>
    
    <proprety>
        <name>mapreduce.reduce.memory.mb</name>
        <value>3072</value>
    </proprety>
    <proprety>
        <name>mapreduce.reduce.java.opts</name>
        <value>-Xmx2560M</value>
    </proprety>
    <proprety>
        <name>mapreduce.task.io.sort.mb</name>
        <value>512</value>
    </proprety>
    <!--More streams merged at once while sorting files.-->
    <proprety>
        <name>mapreduce.task.io.sort.factor</name>
        <value>100</value>
    </proprety>
    <!--Higher number of parallel copies run by reduces to fetch outputs from very large number of maps.-->
    <proprety>
        <name>mapreduce.reduce.shuffle.parallelcopies</name>
        <value>50</value>
    </proprety>
  <!--Configurations for MapReduce JobHistory Server-->
    <proprety>
        <name>mapreduce.jobhistory.address</name>
        <value>d-01:10020,d-02:10020,d-03:10020</value>
    </proprety>
    <proprety>
        <name>mapreduce.jobhistory.webapp.address</name>
        <value>d-01:10020</value>
    </proprety>
    <!--Directory where history files are written by MapReduce jobs.-->
    <proprety>
        <name>mapreduce.jobhistory.intermediate-done-dir</name>
        <value>/opt/hadoop-3.3.2/logs/mr-done</value>
    </proprety>
    <!--Directory where history files are managed by the MR JobHistory Server.-->
    <proprety>
        <name>mapreduce.jobhistory.done-dir</name>
        <value>/opt/hadoop-3.3.2/logs/mr</value>
    </proprety>
</configuration>

workers

说明:这个配置可以不改,添加这个是为了能在d-01执行批量管理服务的脚本。

[root@d-01 ~]# egrep -v "^#|^$" /opt/hadoop-3.3.2/etc/hadoop/workers 
d-01
d-02
d-03

新建启动用户

所有节点都执行

useradd hdfs
useradd yarn

passwd hdfs
passwd yarn

授权

只在d-01执行

chown -R hdfs.hdfs /opt/hadoop-3.3.2/

用户免密登录

hdfs用户免密登录
只在d-01执行

[root@d-01 ~]# su - hdfs
[hdfs@d-01 ~]$ ssh-keygen 
[hdfs@d-01 ~]$ ssh-copy-id d-01
[hdfs@d-01 ~]$ ssh-copy-id d-02
[hdfs@d-01 ~]$ ssh-copy-id d-03
[root@d-01 ~]# su - yarn
[yarn@d-01 ~]$ ssh-keygen 
[yarn@d-01 ~]$ ssh-copy-id d-01
[yarn@d-01 ~]$ ssh-copy-id d-02
[yarn@d-01 ~]$ ssh-copy-id d-03

配置环境变量

HADOOP_HOME配置

所有主机都操作

echo -e "HADOOP_HOME=/opt/hadoop-3.3.2\nexport HADOOP_HOME" >> /etc/profile
source /etc/profile

在hadoop配置文件hadoop-env.sh中配置
所有主机都操作

[root@d-01 ~]# grep -i hadoop_home /opt/hadoop-3.3.2/etc/hadoop/hadoop-env.sh 
export HADOOP_HOME=/opt/hadoop-3.3.2

JAVA_HOME配置

注意:我是yum安装的java。
查看jdk的真实路径。

[root@d-01 ~]# which java
/usr/bin/java
[root@d-01 ~]# ll /usr/bin/|grep java
lrwxrwxrwx  1 root root        26 Apr 24 10:55 alt-java -> /etc/alternatives/alt-java
lrwxrwxrwx  1 root root        22 Apr 24 10:55 java -> /etc/alternatives/java
lrwxrwxrwx  1 root root        23 Apr 24 10:55 javac -> /etc/alternatives/javac
lrwxrwxrwx  1 root root        25 Apr 24 10:55 javadoc -> /etc/alternatives/javadoc
lrwxrwxrwx  1 root root        23 Apr 24 10:55 javah -> /etc/alternatives/javah
lrwxrwxrwx  1 root root        23 Apr 24 10:55 javap -> /etc/alternatives/javap
           
[root@d-01 ~]# ll /etc/alternatives/java
lrwxrwxrwx 1 root root 73 Apr 24 10:55 /etc/alternatives/java -> /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.322.b06-1.el7_9.x86_64/jre/bin/java
[root@d-01 ~]# 

JAVA_HOME为/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.322.b06-1.el7_9.x86_64/
在hadoop配置文件hadoop-env.sh中配置
所有主机都操作

[root@d-01 ~]# grep -i java_home /opt/hadoop-3.3.2/etc/hadoop/hadoop-env.sh 
#  JAVA_HOME=/usr/java/testing hdfs dfs -ls
# Technically, the only required environment variable is JAVA_HOME.
export JAVA_HOME=/usr/lib/jvm/java-1.8.0-openjdk-1.8.0.322.b06-1.el7_9.x86_64/

hadoop shell环境变量配置

说明:可以不配置,配置后可以执行命令的时候不用输入命令全路径。
所有主机都操作

echo "export PATH=$PATH:/opt/hadoop-3.3.2/sbin/:/opt/hadoop-3.3.2/bin/" >> /etc/profile
source /etc/profile

复制文件到另外节点

只在d-01执行

[root@d-01 ~]# scp -rp /opt/hadoop-3.3.2/ d-02:/opt/
[root@d-01 ~]# scp -rp /opt/hadoop-3.3.2/ d-03:/opt/

授权
在d-02和d-03分别执行

[root@d-02 ~]# chown -R hdfs.hdfs /opt/hadoop-3.3.2/
[root@d-03 ~]# chown -R hdfs.hdfs /opt/hadoop-3.3.2/

启动进程

启动进程可以用批量脚本也可以单个执行,本文演示用批量脚本启动(前提是执行脚本的主机配置好workers文件和相关用户的免密登录)

格式化namenode

注意:切换到hdfs用户执行。

[root@d-01 ~]# su - hdfs
[hdfs@d-01 ~]$ hdfs namenode -format

输出

WARNING: /opt/hadoop-3.3.2/logs does not exist. Creating.
2022-04-26 12:53:49,112 INFO namenode.NameNode: STARTUP_MSG: 
/************************************************************
STARTUP_MSG: Starting NameNode
STARTUP_MSG:   host = d-01/192.168.103.232
STARTUP_MSG:   args = [-format]
STARTUP_MSG:   version = 3.3.2
STARTUP_MSG:   classpath = /opt/hadoop-3.3.2/etc/hadoop:/opt/hadoop-3.3.2/share/hadoop/common/lib/accessors-smart-2.4.7.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/animal-sniffer-annotations-1.17.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/asm-5.0.4.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/audience-annotations-0.5.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/avro-1.7.7.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/checker-qual-2.5.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-beanutils-1.9.4.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-codec-1.11.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-collections-3.2.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-compress-1.21.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-configuration2-2.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-daemon-1.0.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-io-2.8.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-lang3-3.12.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-logging-1.1.3.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-math3-3.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-net-3.6.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/commons-text-1.4.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/curator-client-4.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/curator-framework-4.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/curator-recipes-4.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/dnsjava-2.1.7.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/failureaccess-1.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/gson-2.8.9.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/guava-27.0-jre.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/hadoop-annotations-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/hadoop-auth-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/hadoop-shaded-guava-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/hadoop-shaded-protobuf_3_7-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/httpclient-4.5.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/httpcore-4.4.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/j2objc-annotations-1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-annotations-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-core-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-core-asl-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-databind-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-jaxrs-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-mapper-asl-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jackson-xc-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jakarta.activation-api-1.2.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/javax.servlet-api-3.1.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jaxb-api-2.2.11.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jcip-annotations-1.0-1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jersey-core-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jersey-json-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jersey-server-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jersey-servlet-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jettison-1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-http-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-io-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-security-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-server-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-servlet-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-util-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-util-ajax-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-webapp-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jetty-xml-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jsch-0.1.55.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/json-smart-2.4.7.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jsr305-3.0.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jsr311-api-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/jul-to-slf4j-1.7.30.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-admin-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-client-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-common-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-core-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-crypto-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-identity-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-server-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-simplekdc-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerb-util-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerby-asn1-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerby-config-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerby-pkix-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerby-util-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/kerby-xdr-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/log4j-1.2.17.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/metrics-core-3.2.4.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/netty-3.10.6.Final.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/nimbus-jose-jwt-9.8.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/paranamer-2.3.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/protobuf-java-2.5.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/re2j-1.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/slf4j-api-1.7.30.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/slf4j-log4j12-1.7.30.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/snappy-java-1.1.8.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/stax2-api-4.2.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/token-provider-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/woodstox-core-5.3.0.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/zookeeper-3.5.6.jar:/opt/hadoop-3.3.2/share/hadoop/common/lib/zookeeper-jute-3.5.6.jar:/opt/hadoop-3.3.2/share/hadoop/common/hadoop-common-3.3.2-tests.jar:/opt/hadoop-3.3.2/share/hadoop/common/hadoop-common-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/hadoop-kms-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/hadoop-nfs-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/common/hadoop-registry-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/accessors-smart-2.4.7.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/animal-sniffer-annotations-1.17.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/asm-5.0.4.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/audience-annotations-0.5.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/avro-1.7.7.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/checker-qual-2.5.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-beanutils-1.9.4.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-cli-1.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-codec-1.11.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-collections-3.2.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-compress-1.21.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-configuration2-2.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-io-2.8.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-lang3-3.12.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-math3-3.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-net-3.6.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/commons-text-1.4.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/curator-client-4.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/curator-framework-4.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/curator-recipes-4.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/dnsjava-2.1.7.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/failureaccess-1.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/gson-2.8.9.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/guava-27.0-jre.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/hadoop-annotations-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/hadoop-auth-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/hadoop-shaded-guava-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/hadoop-shaded-protobuf_3_7-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/httpclient-4.5.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/httpcore-4.4.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/j2objc-annotations-1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-annotations-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-core-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-core-asl-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-databind-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-jaxrs-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-mapper-asl-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jackson-xc-1.9.13.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jakarta.activation-api-1.2.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/javax.servlet-api-3.1.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jaxb-api-2.2.11.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jcip-annotations-1.0-1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jersey-core-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jersey-json-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jersey-server-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jersey-servlet-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jettison-1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-http-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-io-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-security-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-server-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-servlet-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-util-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-util-ajax-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-webapp-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jetty-xml-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jsch-0.1.55.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/json-simple-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/json-smart-2.4.7.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jsr305-3.0.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/jsr311-api-1.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-admin-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-client-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-common-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-core-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-crypto-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-identity-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-server-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-simplekdc-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerb-util-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerby-asn1-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerby-config-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerby-pkix-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerby-util-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/kerby-xdr-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/listenablefuture-9999.0-empty-to-avoid-conflict-with-guava.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/log4j-1.2.17.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/netty-3.10.6.Final.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/netty-all-4.1.68.Final.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/nimbus-jose-jwt-9.8.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/okhttp-2.7.5.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/okio-1.6.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/paranamer-2.3.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/re2j-1.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/snappy-java-1.1.8.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/stax2-api-4.2.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/token-provider-1.0.1.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/woodstox-core-5.3.0.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/zookeeper-3.5.6.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/lib/zookeeper-jute-3.5.6.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-3.3.2-tests.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-client-3.3.2-tests.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-client-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-httpfs-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-native-client-3.3.2-tests.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-native-client-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-nfs-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-rbf-3.3.2-tests.jar:/opt/hadoop-3.3.2/share/hadoop/hdfs/hadoop-hdfs-rbf-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-app-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-common-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-core-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.2-tests.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-nativetask-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-client-uploader-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/HikariCP-java7-2.4.12.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/aopalliance-1.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/asm-analysis-9.1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/asm-commons-9.1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/asm-tree-9.1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/bcpkix-jdk15on-1.60.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/bcprov-jdk15on-1.60.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/ehcache-3.3.1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/fst-2.50.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/geronimo-jcache_1.0_spec-1.0-alpha-1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/guice-4.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/guice-servlet-4.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jackson-jaxrs-base-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jackson-jaxrs-json-provider-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jackson-module-jaxb-annotations-2.13.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jakarta.xml.bind-api-2.3.3.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/java-util-1.9.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/javax-websocket-client-impl-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/javax-websocket-server-impl-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/javax.inject-1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/javax.websocket-api-1.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/javax.websocket-client-api-1.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/javax.ws.rs-api-2.1.1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jersey-client-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jersey-guice-1.19.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jetty-annotations-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jetty-client-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jetty-jndi-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jetty-plus-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jline-3.9.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/jna-5.2.0.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/json-io-2.5.1.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/metrics-core-3.2.4.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/mssql-jdbc-6.2.1.jre7.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/objenesis-2.6.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/snakeyaml-1.26.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/swagger-annotations-1.5.4.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/websocket-api-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/websocket-client-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/websocket-common-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/websocket-server-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/lib/websocket-servlet-9.4.43.v20210629.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-api-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-applications-mawo-core-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-client-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-common-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-registry-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-common-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-nodemanager-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-router-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-sharedcachemanager-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-tests-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-timeline-pluginstorage-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-server-web-proxy-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-services-api-3.3.2.jar:/opt/hadoop-3.3.2/share/hadoop/yarn/hadoop-yarn-services-core-3.3.2.jar
STARTUP_MSG:   build = git@github.com:apache/hadoop.git -r 0bcb014209e219273cb6fd4152df7df713cbac61; compiled by 'chao' on 2022-02-21T18:39Z
STARTUP_MSG:   java = 1.8.0_322
************************************************************/
2022-04-26 12:53:49,124 INFO namenode.NameNode: registered UNIX signal handlers for [TERM, HUP, INT]
2022-04-26 12:53:49,251 INFO namenode.NameNode: createNameNode [-format]
2022-04-26 12:53:49,821 INFO namenode.NameNode: Formatting using clusterid: CID-8527e2e8-35a7-4d99-9b8a-8ba27b3d13be
2022-04-26 12:53:49,869 INFO namenode.FSEditLog: Edit logging is async:true
2022-04-26 12:53:49,910 INFO namenode.FSNamesystem: KeyProvider: null
2022-04-26 12:53:49,912 INFO namenode.FSNamesystem: fsLock is fair: true
2022-04-26 12:53:49,913 INFO namenode.FSNamesystem: Detailed lock hold time metrics enabled: false
2022-04-26 12:53:49,927 INFO namenode.FSNamesystem: fsOwner                = hdfs (auth:SIMPLE)
2022-04-26 12:53:49,927 INFO namenode.FSNamesystem: supergroup             = supergroup
2022-04-26 12:53:49,927 INFO namenode.FSNamesystem: isPermissionEnabled    = true
2022-04-26 12:53:49,927 INFO namenode.FSNamesystem: isStoragePolicyEnabled = true
2022-04-26 12:53:49,927 INFO namenode.FSNamesystem: HA Enabled: false
2022-04-26 12:53:49,997 INFO common.Util: dfs.datanode.fileio.profiling.sampling.percentage set to 0. Disabling file IO profiling
2022-04-26 12:53:50,015 INFO blockmanagement.DatanodeManager: dfs.block.invalidate.limit: configured=1000, counted=60, effected=1000
2022-04-26 12:53:50,015 INFO blockmanagement.DatanodeManager: dfs.namenode.datanode.registration.ip-hostname-check=true
2022-04-26 12:53:50,022 INFO blockmanagement.BlockManager: dfs.namenode.startup.delay.block.deletion.sec is set to 000:00:00:00.000
2022-04-26 12:53:50,023 INFO blockmanagement.BlockManager: The block deletion will start around 2022 Apr 26 12:53:50
2022-04-26 12:53:50,025 INFO util.GSet: Computing capacity for map BlocksMap
2022-04-26 12:53:50,025 INFO util.GSet: VM type       = 64-bit
2022-04-26 12:53:50,027 INFO util.GSet: 2.0% max memory 491 MB = 9.8 MB
2022-04-26 12:53:50,027 INFO util.GSet: capacity      = 2^20 = 1048576 entries
2022-04-26 12:53:50,044 INFO blockmanagement.BlockManager: Storage policy satisfier is disabled
2022-04-26 12:53:50,044 INFO blockmanagement.BlockManager: dfs.block.access.token.enable = false
2022-04-26 12:53:50,054 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.threshold-pct = 0.999
2022-04-26 12:53:50,054 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.min.datanodes = 0
2022-04-26 12:53:50,054 INFO blockmanagement.BlockManagerSafeMode: dfs.namenode.safemode.extension = 30000
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: defaultReplication         = 3
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: maxReplication             = 512
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: minReplication             = 1
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: maxReplicationStreams      = 2
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: redundancyRecheckInterval  = 3000ms
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: encryptDataTransfer        = false
2022-04-26 12:53:50,055 INFO blockmanagement.BlockManager: maxNumBlocksToLog          = 1000
2022-04-26 12:53:50,094 INFO namenode.FSDirectory: GLOBAL serial map: bits=29 maxEntries=536870911
2022-04-26 12:53:50,094 INFO namenode.FSDirectory: USER serial map: bits=24 maxEntries=16777215
2022-04-26 12:53:50,094 INFO namenode.FSDirectory: GROUP serial map: bits=24 maxEntries=16777215
2022-04-26 12:53:50,094 INFO namenode.FSDirectory: XATTR serial map: bits=24 maxEntries=16777215
2022-04-26 12:53:50,114 INFO util.GSet: Computing capacity for map INodeMap
2022-04-26 12:53:50,114 INFO util.GSet: VM type       = 64-bit
2022-04-26 12:53:50,115 INFO util.GSet: 1.0% max memory 491 MB = 4.9 MB
2022-04-26 12:53:50,115 INFO util.GSet: capacity      = 2^19 = 524288 entries
2022-04-26 12:53:50,118 INFO namenode.FSDirectory: ACLs enabled? true
2022-04-26 12:53:50,118 INFO namenode.FSDirectory: POSIX ACL inheritance enabled? true
2022-04-26 12:53:50,119 INFO namenode.FSDirectory: XAttrs enabled? true
2022-04-26 12:53:50,119 INFO namenode.NameNode: Caching file names occurring more than 10 times
2022-04-26 12:53:50,130 INFO snapshot.SnapshotManager: Loaded config captureOpenFiles: false, skipCaptureAccessTimeOnlyChange: false, snapshotDiffAllowSnapRootDescendant: true, maxSnapshotLimit: 65536
2022-04-26 12:53:50,133 INFO snapshot.SnapshotManager: SkipList is disabled
2022-04-26 12:53:50,141 INFO util.GSet: Computing capacity for map cachedBlocks
2022-04-26 12:53:50,141 INFO util.GSet: VM type       = 64-bit
2022-04-26 12:53:50,141 INFO util.GSet: 0.25% max memory 491 MB = 1.2 MB
2022-04-26 12:53:50,141 INFO util.GSet: capacity      = 2^17 = 131072 entries
2022-04-26 12:53:50,194 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.window.num.buckets = 10
2022-04-26 12:53:50,194 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.num.users = 10
2022-04-26 12:53:50,194 INFO metrics.TopMetrics: NNTop conf: dfs.namenode.top.windows.minutes = 1,5,25
2022-04-26 12:53:50,199 INFO namenode.FSNamesystem: Retry cache on namenode is enabled
2022-04-26 12:53:50,199 INFO namenode.FSNamesystem: Retry cache will use 0.03 of total heap and retry cache entry expiry time is 600000 millis
2022-04-26 12:53:50,202 INFO util.GSet: Computing capacity for map NameNodeRetryCache
2022-04-26 12:53:50,202 INFO util.GSet: VM type       = 64-bit
2022-04-26 12:53:50,202 INFO util.GSet: 0.029999999329447746% max memory 491 MB = 150.8 KB
2022-04-26 12:53:50,202 INFO util.GSet: capacity      = 2^14 = 16384 entries
2022-04-26 12:53:50,231 INFO namenode.FSImage: Allocated new BlockPoolId: BP-1826930779-192.168.103.232-1650948830222
2022-04-26 12:53:50,251 INFO common.Storage: Storage directory /tmp/hadoop-hdfs/dfs/name has been successfully formatted.
2022-04-26 12:53:50,292 INFO namenode.FSImageFormatProtobuf: Saving image file /tmp/hadoop-hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 using no compression
2022-04-26 12:53:50,395 INFO namenode.FSImageFormatProtobuf: Image file /tmp/hadoop-hdfs/dfs/name/current/fsimage.ckpt_0000000000000000000 of size 399 bytes saved in 0 seconds .
2022-04-26 12:53:50,410 INFO namenode.NNStorageRetentionManager: Going to retain 1 images with txid >= 0
2022-04-26 12:53:50,424 INFO namenode.FSNamesystem: Stopping services started for active state
2022-04-26 12:53:50,424 INFO namenode.FSNamesystem: Stopping services started for standby state
2022-04-26 12:53:50,429 INFO namenode.FSImage: FSImageSaver clean checkpoint: txid=0 when meet shutdown.
2022-04-26 12:53:50,429 INFO namenode.NameNode: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down NameNode at d-01/192.168.103.232
************************************************************/

启动hdfs相关进程

单个启动命令如下本文未采用这种方式启动hdfs相关进程(在哪台机器执行,那台机器就启动对应的进程)

hdfs --daemon start namenode
hdfs --daemon start datanode

批量启动脚本方式启动dfs相关进程
注意:切换到hdfs用户执行。
只在d-01执行

[root@d-01 ~]# su - hdfs
[hdfs@d-01 ~]$ start-dfs.sh

验证进程是否启动
所有节点都执行如下命令

jps

d-01输出

[hdfs@d-01 ~]$ jps
5106 Jps
4724 DataNode
4917 SecondaryNameNode
4604 NameNode

d-02输出

[root@d-02 ~]# jps
2931 DataNode
2968 Jps

d-03输出

[root@d-03 ~]# jps
2441 Jps
2399 DataNode

浏览器输入:d-01:9870
在这里插入图片描述
上传文件测试
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述
在这里插入图片描述

启动yarn进程

更改logs目录权限为777。
所有节点都执行

chmod 777 /opt/hadoop-3.3.2/logs/

单个启动命令如下本文未采用这种方式启动yarn相关进程(在哪台机器执行,那台机器就启动对应的进程)

yarn --daemon start resourcemanager
yarn --daemon start nodemanager
yarn --daemon start proxyserver

批量启动脚本方式启动dfs相关进程
注意:切换到yarn用户执行。
只在d-01执行

[root@d-01 ~]# su - yarn
[yarn@d-01 ~]$ start-yarn.sh 
输出
Starting resourcemanager
Starting nodemanagers

验证进程是否启动
所有节点都执行如下命令

jps

d-01输出

[yarn@d-01 ~]$ jps
5315 ResourceManager
5763 Jps
5430 NodeManager

d-02输出

[yarn@d-02 ~]$ jps
3176 Jps
3067 NodeManager

d-03输出

[yarn@d-03 ~]$ jps
2848 Jps
2739 NodeManager

启动mapred进程

在哪个机器执行命令就在那个机器启动该进程。
在d-01启动mapred进程

[root@d-01 ~]# useradd mapred
[root@d-01 ~]# passwd mapred
[root@d-01 ~]# su mapred

执行启动命令

[mapred@d-01 ~]$ mapred --daemon start historyserver

查看进程未启动

[mapred@d-01 ~]$ jps 
6538 Jps

查看启动输出

[mapred@d-01 ~]$ cat /opt/hadoop-3.3.2/logs/hadoop-mapred-historyserver-d-01.out
OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(0x00000000e0000000, 358088704, 0) failed; error='Cannot allocate memory' (errno=12)
#
# There is insufficient memory for the Java Runtime Environment to continue.
# Native memory allocation (mmap) failed to map 358088704 bytes for committing reserved memory.
# An error report file with more information is saved as:
# /tmp/hsperfdata_mapred/hs_err_pid5841.log
core file size          (blocks, -c) 0
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 7190
max locked memory       (kbytes, -l) 64
max memory size         (kbytes, -m) unlimited
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) 4096
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

由输出可以看书是内存问题。

增加swap空间

[root@d-01 ~]# free -h
              total        used        free      shared  buff/cache   available
Mem:           3.7G        1.6G        1.2G         11M        910M        1.9G
Swap:            0B          0B          0B
[root@d-01 ~]# dd if=/dev/zero of=swapfile bs=1024 count=500000
500000+0 records in
500000+0 records out
512000000 bytes (512 MB) copied, 3.46032 s, 148 MB/s
[root@d-01 ~]# mkswap swapfile
Setting up swapspace version 1, size = 499996 KiB
no label, UUID=851a8136-d033-4913-829b-f67c036be6e5
[root@d-01 ~]# swapon swapfile
swapon: /root/swapfile: insecure permissions 0644, 0600 suggested.
[root@d-01 ~]# free -h
              total        used        free      shared  buff/cache   available
Mem:           3.7G        1.6G        754M         11M        1.4G        1.9G
Swap:          488M          0B        488M

再次执行启动命令

[mapred@d-01 ~]$ mapred --daemon start historyserver

查看进程未启动

[mapred@d-01 ~]$ jps 
6538 Jps

查看日志

[mapred@d-01 root]$ cat /opt/hadoop-3.3.2/logs/hadoop-mapred-historyserver-d-01.log 
省略部分内容
Caused by: org.apache.hadoop.ipc.RemoteException(org.apache.hadoop.security.AccessControlException): Permission denied: user=mapred, access=WRITE, inode="/":hdfs:supergroup:drwxr-xr-x
省略部分内容

由输出可以看书是mapred用户没有写hdfs文件系统/目录权限问题。
增加hdfs文件系统/目录w权限

[root@d-01 ~]# su - hdfs
Last login: Tue Apr 26 13:04:23 CST 2022 on pts/0
[hdfs@d-01 ~]$ hdfs dfs -chmod -R +w  /
[hdfs@d-01 ~]$ hdfs dfs -ls -d /
drwxrwxrwx   - hdfs supergroup          0 2022-04-26 13:09 /

再次执行启动命令

[mapred@d-01 ~]$ mapred --daemon start historyserver

验证进程是否启动

[mapred@d-01 ~]$ jps
6437 Jps
6408 JobHistoryServer

报错及解决

命令行查看hdfs文件系统命令无法执行

报错如下,执行任务hdfs dfs命令都报错

hdfs dfs -ls /
2022-12-09 23:34:51,951 WARN fs.FileSystem: Failed to initialize fileystem hdfs://mycluster: java.io.IOException: Couldn't create proxy provider null
ls: Couldn't create proxy provider null

解决:
编辑hdfs-site.xml配置文件

  <property>
        <name>dfs.client.failover.proxy.provider.mycluster</name>
        <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
    </property>

无需重启服务。

web界面传文件报错1

在这里插入图片描述

在这里插入图片描述
在这里插入图片描述
解决:
1、修改配置文件。
2、在windows本地hosts中加主机名解析。

web界面传文件报错2

在这里插入图片描述
原因:datanode没启动

[hdfs@d-03 ~]$ jps
21014 Jps
[hdfs@d-02 ~]$ jps
20036 Jps
[hdfs@d-01 ~]$ jps
58263 SecondaryNameNode
57980 NameNode
59295 Jps

查看datanode日志

2022-04-26 10:18:54,234 WARN org.apache.hadoop.hdfs.server.common.Storage: Failed to add storage directory [DISK]file:/tmp/hadoop-hdfs/dfs/data
java.io.IOException: Incompatible clusterIDs in /tmp/hadoop-hdfs/dfs/data: namenode clusterID = CID-6ab14375-2908-4d6e-bb62-564b2a0d93bf; datanode clusterID = CID-70ca32df-4979-4202-b220-3449da0c4702
	at org.apache.hadoop.hdfs.server.datanode.DataStorage.doTransition(DataStorage.java:746)
	at org.apache.hadoop.hdfs.server.datanode.DataStorage.loadStorageDirectory(DataStorage.java:296)
	at org.apache.hadoop.hdfs.server.datanode.DataStorage.loadDataStorage(DataStorage.java:409)
	at org.apache.hadoop.hdfs.server.datanode.DataStorage.addStorageLocations(DataStorage.java:389)
	at org.apache.hadoop.hdfs.server.datanode.DataStorage.recoverTransitionRead(DataStorage.java:561)
	at org.apache.hadoop.hdfs.server.datanode.DataNode.initStorage(DataNode.java:1739)
	at org.apache.hadoop.hdfs.server.datanode.DataNode.initBlockPool(DataNode.java:1675)
	at org.apache.hadoop.hdfs.server.datanode.BPOfferService.verifyAndSetNamespaceInfo(BPOfferService.java:394)
	at org.apache.hadoop.hdfs.server.datanode.BPServiceActor.connectToNNAndHandshake(BPServiceActor.java:295)
	at org.apache.hadoop.hdfs.server.datanode.BPServiceActor.run(BPServiceActor.java:854)
	at java.lang.Thread.run(Thread.java:750)
2022-04-26 10:18:54,240 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: Initialization failed for Block pool <registering> (Datanode Uuid 9f96ca23-da3e-4619-ada6-0155afe1edff) service to d-01/192.168.103.232:9000. Exiting. 
java.io.IOException: All specified directories have failed to load.
	at org.apache.hadoop.hdfs.server.datanode.DataStorage.recoverTransitionRead(DataStorage.java:562)
	at org.apache.hadoop.hdfs.server.datanode.DataNode.initStorage(DataNode.java:1739)
	at org.apache.hadoop.hdfs.server.datanode.DataNode.initBlockPool(DataNode.java:1675)
	at org.apache.hadoop.hdfs.server.datanode.BPOfferService.verifyAndSetNamespaceInfo(BPOfferService.java:394)
	at org.apache.hadoop.hdfs.server.datanode.BPServiceActor.connectToNNAndHandshake(BPServiceActor.java:295)
	at org.apache.hadoop.hdfs.server.datanode.BPServiceActor.run(BPServiceActor.java:854)
	at java.lang.Thread.run(Thread.java:750)
2022-04-26 10:18:54,240 WARN org.apache.hadoop.hdfs.server.datanode.DataNode: Ending block pool service for: Block pool <registering> (Datanode Uuid 9f96ca23-da3e-4619-ada6-0155afe1edff) service to d-01/192.168.103.232:9000

注意到日志中有这么一段Incompatible clusterIDs in /tmp/hadoop-hdfs/dfs/data
这是因为我配置了namenode的高可用模式,并启动了,现在切回到了单namenode模式导致的。
解决:到每个节点执行如下命令删除/tmp/hadoop-hdfs/dfs/data目录。

rm /tmp/hadoop-hdfs/dfs/data -rf

proxyserver进程没启动

暂时没解决。

2022-04-24 18:48:47,598 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: Got finalize command for block pool BP-379694539-192.168.103.232-1650785972850
2022-04-24 18:49:35,435 ERROR org.apache.hadoop.hdfs.server.datanode.DataNode: RECEIVED SIGNAL 15: SIGTERM
2022-04-24 18:49:35,439 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: SHUTDOWN_MSG: 
/************************************************************
SHUTDOWN_MSG: Shutting down DataNode at d-01/192.168.103.232
************************************************************/
2022-04-24 18:50:24,238 INFO org.apache.hadoop.hdfs.server.datanode.DataNode: STARTUP_MSG: 
[yarn@d-01 ~]$ /opt/hadoop-3.3.2/bin/hadoop dfsadmin -refreshNodes
WARNING: Use of this script to execute dfsadmin is deprecated.
WARNING: Attempting to execute replacement "hdfs dfsadmin" instead.

OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(0x00000000e0000000, 358088704, 0) failed; error='Cannot allocate memory' (errno=12)
#
# There is insufficient memory for the Java Runtime Environment to continue.
# Native memory allocation (mmap) failed to map 358088704 bytes for committing reserved memory.
# An error report file with more information is saved as:
# /home/yarn/hs_err_pid20267.log
OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(0x00000000e0000000, 358088704, 0) failed; error='Cannot allocate memory' (errno=12)
# An error report file with more information is saved as:

proxyweb启动不了

[yarn@d-01 ~]$ /opt/hadoop-3.3.2/sbin/start-yarn.sh 
Starting resourcemanager
Starting nodemanagers
[yarn@d-01 ~]$ jps
5280 Jps
4869 ResourceManager
4990 NodeManager
[yarn@d-01 ~]$ cat /opt/hadoop-3.3.2/logs/hadoop-yarn-proxyserver-d-01.out 
OpenJDK 64-Bit Server VM warning: INFO: os::commit_memory(0x00000000e0000000, 358088704, 0) failed; error='Cannot allocate memory' (errno=12)
#
# There is insufficient memory for the Java Runtime Environment to continue.
# Native memory allocation (mmap) failed to map 358088704 bytes for committing reserved memory.
# An error report file with more information is saved as:
# /home/yarn/hs_err_pid13545.log
core file size          (blocks, -c) 0
data seg size           (kbytes, -d) unlimited
scheduling priority             (-e) 0
file size               (blocks, -f) unlimited
pending signals                 (-i) 7190
max locked memory       (kbytes, -l) 64
max memory size         (kbytes, -m) unlimited
open files                      (-n) 1024
pipe size            (512 bytes, -p) 8
POSIX message queues     (bytes, -q) 819200
real-time priority              (-r) 0
stack size              (kbytes, -s) 8192
cpu time               (seconds, -t) unlimited
max user processes              (-u) 4096
virtual memory          (kbytes, -v) unlimited
file locks                      (-x) unlimited

增加内存后依然报错

[root@d-01 ~]# cat /opt/hadoop-3.3.2/etc/hadoop/yarn-site.xml 
<proprety>
    <name>yarn.scheduler.minimum-allocation-mb</name>
    <value>2g</value>
</proprety>
<proprety>
    <name>yarn.scheduler.maximum-allocation-mb</name>
    <value>2g</value>
</proprety>
<proprety>
    <name>yarn.resourcemanager.nodes.include-path</name>
    <value>/opt/hadoop-3.3.2/resource</value>
</proprety>
    <!-- Configurations for NodeManager -->
<proprety>
    <name>yarn.nodemanager.resource.memory-mb</name>
    <value>512m</value>
</proprety>

批量脚本执行前提

没做免密登录不能做批量脚本,否则报错,报错如下
说明:d-03节点没做到其他主机的免密登录。

[hdfs@d-03 ~]$ /opt/hadoop-3.3.2/sbin/start-dfs.sh 
Starting namenodes on [d-01]
d-01: Permission denied (publickey,gssapi-keyex,gssapi-with-mic,password).
Starting datanodes
localhost: Permission denied (publickey,gssapi-keyex,gssapi-with-mic,password).
Starting secondary namenodes [d-03]
d-03: Permission denied (publickey,gssapi-keyex,gssapi-with-mic,password).

搭建手动切换高可用集群

说明:本文将namenode单点切换为namenode高可用并不损坏集群状态。
高可用集群有两种方式nfs和qjm详情见官网

https://hadoop.apache.org/docs/r3.3.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithNFS.html
https://hadoop.apache.org/docs/r3.3.2/hadoop-project-dist/hadoop-hdfs/HDFSHighAvailabilityWithQJM.html

官网QJM重要摘录

Similar to Federation configuration, HA configuration is backward compatible and allows existing single NameNode configurations to work without change. The new configuration is designed such that all the nodes in the cluster may have the same configuration without the need for deploying different configuration files to different machines based on the type of the node.

与Federation配置类似,HA配置是向后兼容的,允许现有的单个NameNode配置在不更改的情况下工作。 新配置的设计使集群中的所有节点都可以具有相同的配置,而不需要根据节点的类型将不同的配置文件部署到不同的机器上。

Like HDFS Federation, HA clusters reuse the nameservice ID to identify a single HDFS instance that may in fact consist of multiple HA NameNodes. In addition, a new abstraction called NameNode ID is added with HA. Each distinct NameNode in the cluster has a different NameNode ID to distinguish it. To support a single configuration file for all of the NameNodes, the relevant configuration parameters are suffixed with the nameservice ID as well as the NameNode ID.

像HDFS Federation一样,HA集群重用nameservice ID来识别一个HDFS实例,而这个HDFS实例实际上可能包含多个HA namenode。 此外,HA还添加了一个名为NameNode ID的新抽象。 集群中每个不同的NameNode都有一个不同的NameNode ID来区分。 为了支持所有NameNode的单个配置文件,相关的配置参数都以nameservice ID和NameNode ID作为后缀。

在设置了所有必要的配置选项之后,必须在将要运行JournalNode守护进程的计算机集中启动它们。 这可以通过运行“hdfs——daemon start journalnode”命令,并在每台相关机器上等待守护进程启动来完成。

启动JournalNodes之后,必须首先同步两个HA namenode的磁盘上元数据。

如果你正在设置一个新的HDFS集群,你应该首先在一个namenode上运行format命令(HDFS namenode -format)。

如果你已经格式化了NameNode,或者正在将一个未启用ha的集群转换为启用ha的集群,你现在应该通过在未启用ha的NameNode上运行“hdfs NameNode -bootstrapStandby”命令,将你的NameNode元数据目录的内容复制到另一个未启用ha的NameNode。 运行此命令还将确保JournalNodes(由dfs.namenode.shared. editors .dir配置)包含足够的编辑事务,从而能够启动两个namenode。

如果你要将一个非HA的NameNode转换为HA,你应该运行" hdfs NameNode -initializeSharedEdits "命令,它将使用来自本地NameNode编辑目录的编辑数据初始化JournalNodes。

在这一点上,你可以启动所有的HA NameNode,就像你通常启动一个NameNode一样。

你可以通过浏览它们配置的HTTP地址来分别访问每个namenode的网页。 您应该注意到,在配置的地址旁边是NameNode的HA状态(“standby”或“active”)。 HA NameNode启动时,初始状态为Standby。

如果要进行HA升级,操作人员需要进行以下操作:
1、正常关闭所有nn,并安装更新的软件。
2、启动所有的jn。 请注意,在执行升级、回滚或结束操作时,所有的jn都要运行,这一点非常重要。 如果在运行任何这些操作时任何jn关闭,则该操作将失败。
3、以’-upgrade’标志启动一个nn。
3、在启动时,该NN将不会像往常一样在HA设置中进入备用状态。 相反,该NN将立即进入活动状态,对其本地存储dirs执行升级,并对共享编辑日志执行升级。此时,HA对中的另一个NN将与升级后的NN不同步。 为了使它恢复同步并再次具有高可用性设置,你应该通过运行带有“-bootstrapStandby”标志的NN来重新引导这个NameNode。 使用“-upgrade”标志启动第二个NN是错误的。
请注意,在任何时候,如果你想在完成或回滚升级之前重启namenode,你应该正常启动nn,即没有任何特殊的启动标志。
要查询升级状态,操作员需要在至少一个nn运行时使用’ hdfs dfsadmin -upgrade query’命令。 对于每个NN,无论升级过程是否完成,该命令都将返回。
要完成HA升级,当nn运行且其中一个处于激活状态时,操作员将使用’ hdfs dfsadmin -finalizeUpgrade’命令。 当发生这种情况时,活跃的NN将执行共享日志的终结,而其本地存储目录包含以前的FS状态的NN将删除其本地状态。
要执行升级回滚,应该首先关闭两个nn。 操作员应该在他们发起升级过程的NN上运行回滚命令,这将对本地dirs以及共享日志(NFS或jn)执行回滚。 然后,这个NN应该启动,操作员应该在另一个NN上运行’ -bootstrapStandby’,使两个NN与这个回滚的文件系统状态同步。

开始搭建

停止所有进程进程

[hdfs@d-01 ~]$ stop-dfs.sh 
Stopping namenodes on [d-01]
Stopping datanodes
Stopping secondary namenodes [d-01]
[yarn@d-01 ~]$ stop-yarn.sh 
Stopping nodemanagers
d-03: WARNING: nodemanager did not stop gracefully after 5 seconds: Trying to kill with kill -9
d-02: WARNING: nodemanager did not stop gracefully after 5 seconds: Trying to kill with kill -9
Stopping resourcemanager
[root@d-01 ~]# su - mapred
Last login: Tue Apr 26 13:38:10 CST 2022 on pts/0
[mapred@d-01 ~]$ mapred --daemon stop historyserver

去d-02和d-03上分别操作

[root@d-02 ~]# jps
3943 Jps
3067 -- process information unavailable
[root@d-02 ~]# ll /tmp/
total 0
drwxrwxr-x 3 hdfs hdfs 17 Apr 26 13:07 hadoop-hdfs
drwxr-xr-x 3 yarn yarn 26 Apr 26 13:27 hadoop-yarn
drwxr-xr-x 2 hdfs hdfs  6 Apr 26 16:44 hsperfdata_hdfs
drwxr-xr-x 2 root root  6 Apr 26 16:46 hsperfdata_root
drwxr-xr-x 2 yarn yarn 18 Apr 26 13:28 hsperfdata_yarn
drwx------ 2 root root  6 Apr 26 16:42 vmware-root
[root@d-02 ~]# ll /tmp/hsperfdata_yarn/3067 
-rw------- 1 yarn yarn 32768 Apr 26 16:45 /tmp/hsperfdata_yarn/3067
[root@d-02 ~]# rm -rf /tmp/hsperfdata_yarn/3067 
[root@d-02 ~]# jps
3958 Jps
[root@d-03 ~]# jps
2739 -- process information unavailable
3559 Jps
[root@d-03 ~]# ll /tmp/
total 0
drwxrwxr-x 3 hdfs hdfs 17 Apr 26 13:07 hadoop-hdfs
drwxr-xr-x 3 yarn yarn 26 Apr 26 13:27 hadoop-yarn
drwxr-xr-x 2 hdfs hdfs  6 Apr 26 16:44 hsperfdata_hdfs
drwxr-xr-x 2 root root  6 Apr 26 16:47 hsperfdata_root
drwxr-xr-x 2 yarn yarn 18 Apr 26 13:29 hsperfdata_yarn
drwx------ 3 yarn yarn 20 Apr 26 13:29 jetty-0_0_0_0-8042-hadoop-yarn-common-3_3_2_jar-_-any-5217348324521214543
drwx------ 3 root root 17 Apr 26 10:54 systemd-private-9306b25b7b61420e8237dcf5021a8eec-chronyd.service-n6MkUz
drwx------ 2 root root  6 Apr 26 16:42 vmware-root
[root@d-03 ~]# rm -rf /tmp/hsperfdata_yarn/2739 
[root@d-03 ~]# jps
3573 Jps

改配置

只在d-01操作
core-site.xml文件将fs.defaultFS的值由hdfs://d-01:9000改为hdfs://mycluster

   <property>
        <name>fs.defaultFS</name>
        <value>hdfs://mycluster</value>
        <!--value>hdfs://d-01:9000</value-->
    </property>

hdfs-site.xml文件增加如下配置

<!--配置namenode高可用-->
<property>
    <name>dfs.nameservices</name>
    <value>mycluster</value>
</property>
<property>
    <name>dfs.ha.namenodes.mycluster</name>
    <value>nn1,nn2,nn3</value>
</property>
<property>
    <name>dfs.namenode.rpc-address.mycluster.nn1</name>
    <value>d-01:8020</value>
</property>
<property>
    <name>dfs.namenode.rpc-address.mycluster.nn2</name>
    <value>d-02:8020</value>
</property>
<property>
    <name>dfs.namenode.rpc-address.mycluster.nn3</name>
    <value>d-03:8020</value>
</property>
<property>
    <name>dfs.namenode.http-address.mycluster.nn1</name>
    <value>d-01:9870</value>
</property>
<property>
    <name>dfs.namenode.http-address.mycluster.nn2</name>
    <value>d-02:9870</value>
</property>
<property>
    <name>dfs.namenode.http-address.mycluster.nn3</name>
    <value>d-03:9870</value>
</property>
<property>
    <name>dfs.namenode.shared.edits.dir</name>
    <value>qjournal://d-01:8485;d-02:8485;d-03:8485/mycluster</value>
</property>
<property>
    <name>dfs.client.failover.proxy.provider.mycluster</name>
    <value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
    <name>dfs.ha.fencing.methods</name>
    <value>sshfence</value>
</property>
<property>
    <name>dfs.ha.fencing.ssh.connect-timeout</name>
    <value>30000</value>
</property>
<property>
    <name>dfs.ha.fencing.ssh.private-key-files</name>
    <value>/home/hdfs/.ssh/id_rsa</value>
</property>
<property>
    <name>dfs.journalnode.edits.dir</name>
    <value>/opt/hadoop-3.3.2/journal/data</value>
</property>

从d-01复制配置到其他节点

[root@d-01 ~]# scp /opt/hadoop-3.3.2/etc/hadoop/core-site.xml d-02:/opt/hadoop-3.3.2/etc/hadoop/

[root@d-01 ~]# scp /opt/hadoop-3.3.2/etc/hadoop/core-site.xml d-03:/opt/hadoop-3.3.2/etc/hadoop/

[root@d-01 ~]# scp /opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml d-02:/opt/hadoop-3.3.2/etc/hadoop/

[root@d-01 ~]# scp /opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml d-03:/opt/hadoop-3.3.2/etc/hadoop/

启动journalnode进程

每个节点都执行

[hdfs@d-01 ~]$ hdfs --daemon start journalnode

在每个节点的目录下建立

[hdfs@d-01 ~]$ mkdir /opt/hadoop-3.3.2/journal/data/mycluster
[hdfs@d-02 ~]$ mkdir /opt/hadoop-3.3.2/journal/data/mycluster
[hdfs@d-03 ~]$ mkdir /opt/hadoop-3.3.2/journal/data/mycluster

不建立目录后续操作journalnode日志会报如下错误。

2022-04-26 17:55:07,004 WARN org.apache.hadoop.hdfs.server.common.Storage: Storage directory /opt/hadoop-3.3.2/journal/data/mycluster does not exist
2022-04-26 17:55:07,400 INFO org.apache.hadoop.hdfs.qjournal.server.JournalNodeSyncer: Starting SyncJournal daemon for journal mycluster
2022-04-26 17:55:07,417 INFO org.apache.hadoop.ipc.Server: IPC Server handler 3 on default port 8485, call Call#1 Retry#0 org.apache.hadoop.hdfs.qjournal.protocol.QJournalProtocol.getEditLogManifest from 192.168.103.232:38956
org.apache.hadoop.hdfs.qjournal.protocol.JournalNotFormattedException: Journal Storage Directory root= /opt/hadoop-3.3.2/journal/data/mycluster; location= null not formatted ; journal id: mycluster
	at org.apache.hadoop.hdfs.qjournal.server.Journal.checkFormatted(Journal.java:531)
	at org.apache.hadoop.hdfs.qjournal.server.Journal.getEditLogManifest(Journal.java:721)
	at org.apache.hadoop.hdfs.qjournal.server.JournalNodeRpcServer.getEditLogManifest(JournalNodeRpcServer.java:229)

初始化journalnode

[hdfs@d-01 ~]$ hdfs namenode -initializeSharedEdits

同步namenode数据
启动格式化过的的namenode,这里是d-01。

[hdfs@d-01 ~]$ hdfs --daemon start namenode

警告:同步数据前必须启动做过格式化的那个namenode节点,否则执行hdfs namenode -bootstrapStandby命令会报如下错误。

2022-04-26 17:53:33,565 INFO ha.BootstrapStandby: Found nn: nn1, ipc: d-01/192.168.103.232:8020
2022-04-26 17:53:33,565 INFO ha.BootstrapStandby: Found nn: nn3, ipc: d-03/192.168.103.237:8020
2022-04-26 17:53:35,618 INFO ipc.Client: Retrying connect to server: d-01/192.168.103.232:8020. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)
2022-04-26 17:53:36,620 INFO ipc.Client: Retrying connect to server: d-01/192.168.103.232:8020. Already tried 1 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=10, sleepTime=1000 MILLISECONDS)

警告:只在没有格式化的namenode节点执行,因为d-01节点已经格式化了,所以不要在d-01上执行。

[hdfs@d-02 ~]$ hdfs namenode -bootstrapStandby
[hdfs@d-03 ~]$ hdfs namenode -bootstrapStandby

警告:执行命令节点必须做主机名解析,否则执行hdfs namenode -bootstrapStandby命令会报如下错误。

2022-04-26 16:59:40,358 WARN hdfs.DFSUtilClient: Namenode for mycluster remains unresolved for ID nn1. Check your hdfs-site.xml file to ensure namenodes are configured properly.
2022-04-26 16:59:40,622 WARN hdfs.DFSUtilClient: Namenode for mycluster remains unresolved for ID nn3. Check your hdfs-site.xml file to ensure namenodes are configured properly.

启动d-02和d-03的namenode

[hdfs@d-02 ~]$ hdfs --daemon start namenode
[hdfs@d-03 ~]$ hdfs --daemon start namenode
[hdfs@d-01 ~]$ hdfs haadmin -getAllServiceState
d-01:8020                                          standby   
d-02:8020                                          standby   
d-03:8020                                          standby   
[hdfs@d-01 ~]$ hdfs haadmin -transitionToActive nn3
[hdfs@d-01 ~]$ hdfs haadmin -getAllServiceState
d-01:8020                                          standby   
d-02:8020                                          standby   
d-03:8020                                          active    

你可以通过浏览它们配置的HTTP地址来分别访问每个namenode的网页。 您应该注意到,在配置的地址旁边是NameNode的HA状态(“standby”或“active”)。 HA NameNode启动时,初始状态为Standby。

启动其他进程

[hdfs@d-01 ~]$ hdfs --daemon start datanode
[hdfs@d-02 ~]$ hdfs --daemon start datanode
[hdfs@d-03 ~]$ hdfs --daemon start datanode
[yarn@d-01 ~]$ start-yarn.sh 
[mapred@d-01 ~]$ mapred --daemon start historyserver

单点变手动集群成功。

搭建自动切换高可用集群

搭建zookeeper

准备安装包

下载安装包
d-01操作

wget https://dlcdn.apache.org/zookeeper/zookeeper-3.6.3/apache-zookeeper-3.6.3-bin.tar.gz

解压安装包
d-01操作

[root@d-01 opt]# tar zxvf apache-zookeeper-3.6.3-bin.tar.gz -C /opt/

配置zookeeper集群

d-01操作

[root@d-01 ~]# cd /opt/apache-zookeeper-3.6.3-bin/conf/
[root@d-01 conf]# cp zoo_sample.cfg zoo.cfg

配置如下

[root@d-01 conf]# egrep -v "^#|^$" zoo.cfg 
tickTime=2000
initLimit=5
syncLimit=2
dataDir=/var/lib/zookeeper
clientPort=2181
server.1=d-01:2888:3888
server.2=d-02:2888:3888
server.3=d-03:2888:3888

将zookeeper目录整个复制到d-02和d-03节点
d-01操作

[root@d-01 ~]# scp -rp /opt/apache-zookeeper-3.6.3-bin/ d-02:/opt/
[root@d-01 ~]# scp -rp /opt/apache-zookeeper-3.6.3-bin/ d-03:/opt/

在每个节点创建zookeeper数据目录,并在该目录下创建myid文件
每个节点都操作

[root@d-01 ~]# mkdir  /var/lib/zookeeper
[root@d-02 ~]# mkdir  /var/lib/zookeeper
[root@d-03 ~]# mkdir  /var/lib/zookeeper

[root@d-01 ~]# echo "1" > /var/lib/zookeeper/myid
[root@d-02 ~]# echo "2" > /var/lib/zookeeper/myid
[root@d-03 ~]# echo "3" > /var/lib/zookeeper/myid

启动zookeeper集群

[root@d-01 ~]# /opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh start
[root@d-02 ~]# /opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh start
[root@d-03 ~]# /opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh start

查看集群状态

[root@d-01 ~]# /opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /opt/apache-zookeeper-3.6.3-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: follower

[root@d-02 ~]# /opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /opt/apache-zookeeper-3.6.3-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: leader

[root@d-03 ~]# /opt/apache-zookeeper-3.6.3-bin/bin/zkServer.sh status
/usr/bin/java
ZooKeeper JMX enabled by default
Using config: /opt/apache-zookeeper-3.6.3-bin/bin/../conf/zoo.cfg
Client port found: 2181. Client address: localhost. Client SSL: false.
Mode: follower

切换到自动故障转移集群

说明:基于手动故障转移集群操作。

停止所有运行的服务

在d-01操作
停止mapred服务

[root@d-01 ~]# su - mapred
Last login: Tue Apr 26 18:11:39 CST 2022 on pts/1
[mapred@d-01 ~]$ mapred --daemon stop historyserver

在d-01操作
停止yarn服务

[mapred@d-01 ~]$ su - yarn
Password: 
Last login: Tue Apr 26 18:11:20 CST 2022 on pts/1
[yarn@d-01 ~]$ stop-yarn.sh 
Stopping nodemanagers
Stopping resourcemanager

在d-01操作
停止hdfs服务

[yarn@d-01 ~]$ su - hdfs
Password: 
Last login: Tue Apr 26 17:50:41 CST 2022 on pts/1
[hdfs@d-01 ~]$ stop-dfs.sh 
Stopping namenodes on [d-01 d-02 d-03]
Stopping datanodes
Stopping journal nodes [d-03 d-01 d-02]

确认所有服务都停止
在每个节点都操作
切换到root用户

[root@d-01 ~]# jps
20322 QuorumPeerMain
21484 Jps
[root@d-02 ~]# jps
13890 Jps
13497 QuorumPeerMain
[root@d-03 ~]# jps
11403 QuorumPeerMain
11759 Jps

注:QuorumPeerMain为zookeeper进程。

配置hadoop集群

警告:在开始配置自动故障转移之前,应该关闭集群。 在集群运行时,目前不可能从手动故障转移设置过渡到自动故障转移设置。

d-01操作
hdfs-site.xml文件增加如下配置

<!--namenode高可用集群自动切换配置-->
<property>
    <name>dfs.ha.automatic-failover.enabled</name>
    <value>true</value>
</property>

core-site.xml文件增加如下配置

<!--namenode高可用集群自动切换配置-->
<property>
   <name>ha.zookeeper.quorum</name>
   <value>d-01:2181,d-02:2181,d-03:2181</value>
</property>

复制配置到其他主机

[root@d-01 ~]# scp -rp /opt/hadoop-3.3.2/etc/hadoop/core-site.xml d-02:/opt/hadoop-3.3.2/etc/hadoop/core-site.xml                                                                                                                                 
[root@d-01 ~]# scp -rp /opt/hadoop-3.3.2/etc/hadoop/core-site.xml d-03:/opt/hadoop-3.3.2/etc/hadoop/core-site.xml 
[root@d-01 ~]# scp -rp /opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml d-02:/opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml 
[root@d-01 ~]# scp -rp /opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml d-03:/opt/hadoop-3.3.2/etc/hadoop/hdfs-site.xml 

必要操作

安装fsuer命令
在每个节点执行

[root@d-01 ~]# yum install -y psmisc
[root@d-02 ~]# yum install -y psmisc
[root@d-03 ~]# yum install -y psmisc

hdfs用户免密登录。
d-01操作

[hdfs@d-01 ~]$ ssh-keygen 
[hdfs@d-01 ~]$ ssh-copy-id d-01
[hdfs@d-01 ~]$ ssh-copy-id d-02
[hdfs@d-01 ~]$ ssh-copy-id d-03

d-02操作

[hdfs@d-02 ~]$ ssh-keygen 
[hdfs@d-02 ~]$ ssh-copy-id d-01
[hdfs@d-02 ~]$ ssh-copy-id d-02
[hdfs@d-02 ~]$ ssh-copy-id d-03

d-03操作

[hdfs@d-03 ~]$ ssh-keygen 
[hdfs@d-03 ~]$ ssh-copy-id d-01
[hdfs@d-03 ~]$ ssh-copy-id d-02
[hdfs@d-03 ~]$ ssh-copy-id d-03

在zookeeper中初始化ha状态

从任意一台主机运行即可

[hdfs@d-01 ~]$ hdfs zkfc -formatZK

这将在ZooKeeper中创建一个znode,自动故障转移系统将在其中存储数据。
如果报错

FATAL ha.ZKFailoverController: Automatic failover is not enabled for NameNod
e at node1/192.168.103.232:8020. Please ensure that automatic failover is enabled in the configuration before running the ZK failover controller.

在hdfs-site中添加如下配置
所有节点都配置

<property> 
<name>dfs.ha.automatic-failover.enabled.mycluster</name> 
<value>true</value> 
</property> 

启动hdfs

由于在配置中启用了自动故障转移,start-dfs.sh脚本现在将在任何运行NameNode的机器上自动启动一个ZKFC守护进程。 当zkfc启动时,它们会自动选择一个namenode激活。
d-01节点执行

[hdfs@d-01 ~]$ start-dfs.sh 

输出

Starting namenodes on [d-01 d-02 d-03]
Starting datanodes
Starting journal nodes [d-03 d-01 d-02]
Starting ZK Failover Controllers on NN hosts [d-01 d-02 d-03]

查看namenode

[hdfs@d-01 ~]$ hdfs haadmin -getAllServiceState
d-01:8020                                          active    
d-02:8020                                          standby   
d-03:8020                                          standby   

从输出看出d-01被选为active的namenode。
手动启动zkfc集群命令为

hdfs --daemon start zkfc

启动其他服务

[yarn@d-01 ~]$ start-yarn.sh 
Starting resourcemanager
Starting nodemanagers
[yarn@d-01 ~]$ su - mapred
[mapred@d-01 ~]$ mapred --daemon start historyserver

验证故障自动转移

查看namenode状态
任意节点执行即可

[hdfs@d-01 ~]$ hdfs haadmin -getAllServiceState
d-01:8020                                          standby   
d-02:8020                                          standby   
d-03:8020                                          active   

登录d-03停止其namenode

[hdfs@d-03 ~]$ hdfs --daemon stop namenode

任意节点查看是否自动切换状态

[hdfs@d-01 ~]$ hdfs haadmin -getAllServiceState
d-01:8020                                          standby   
d-02:8020                                          active    
2022-04-27 11:44:02,314 INFO ipc.Client: Retrying connect to server: d-03/192.168.103.237:8020. Already tried 0 time(s); retry policy is RetryUpToMaximumCountWithFixedSleep(maxRetries=1, sleepTime=1000 MILLISECONDS)
d-03:8020                                          Failed to connect: Call From d-01/192.168.103.232 to d-03:8020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused

从输出可以看出自动切换成功。

切换失败原因

原因1

zkfc日志输出

2022-04-27 11:35:49,910 WARN org.apache.hadoop.ha.SshFenceByTcpPort: PATH=$PATH:/sbin:/usr/sbin fuser -v -k -n tcp 8020 via ssh: bash: fuser: command not found
2022-04-27 11:35:49,910 INFO org.apache.hadoop.ha.SshFenceByTcpPort: rc: 127
2022-04-27 11:35:49,910 INFO org.apache.hadoop.ha.SshFenceByTcpPort.jsch: Disconnecting from d-02 port 22
2022-04-27 11:35:49,910 WARN org.apache.hadoop.ha.NodeFencer: Fencing method org.apache.hadoop.ha.SshFenceByTcpPort(null) was unsuccessful.
2022-04-27 11:35:49,910 ERROR org.apache.hadoop.ha.NodeFencer: Unable to fence service by any configured method.
2022-04-27 11:35:49,910 WARN org.apache.hadoop.ha.ActiveStandbyElector: Exception handling the winning of election
java.lang.RuntimeException: Unable to fence NameNode at d-02/192.168.103.235:8020
	at org.apache.hadoop.ha.ZKFailoverController.doFence(ZKFailoverController.java:558)
	at org.apache.hadoop.ha.ZKFailoverController.fenceOldActive(ZKFailoverController.java:530)
	at org.apache.hadoop.ha.ZKFailoverController.access$1100(ZKFailoverController.java:63)
	at org.apache.hadoop.ha.ZKFailoverController$ElectorCallbacks.fenceOldActive(ZKFailoverController.java:966)
	at org.apache.hadoop.ha.ActiveStandbyElector.fenceOldActive(ActiveStandbyElector.java:997)
	at org.apache.hadoop.ha.ActiveStandbyElector.becomeActive(ActiveStandbyElector.java:896)
	at org.apache.hadoop.ha.ActiveStandbyElector.processResult(ActiveStandbyElector.java:476)
	at org.apache.zookeeper.ClientCnxn$EventThread.processEvent(ClientCnxn.java:636)
	at org.apache.zookeeper.ClientCnxn$EventThread.run(ClientCnxn.java:510)

从报错可以看出fsuer命令未找到,安装即可。
在每个节点执行

[root@d-01 ~]# yum install -y psmisc
[root@d-02 ~]# yum install -y psmisc
[root@d-03 ~]# yum install -y psmisc
原因2
2022-04-27 11:49:55,385 WARN org.apache.hadoop.ha.HealthMonitor: Transport-level exception trying to monitor health of NameNode at d-01/192.168.103.232:8020
java.net.ConnectException: Call From d-01/192.168.103.232 to d-01:8020 failed on connection exception: java.net.ConnectException: Connection refused; For more details see:  http://wiki.apache.org/hadoop/ConnectionRefused
	at sun.reflect.GeneratedConstructorAccessor25.newInstance(Unknown Source)
	at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
	at java.lang.reflect.Constructor.newInstance(Constructor.java:423)
	at org.apache.hadoop.net.NetUtils.wrapWithMessage(NetUtils.java:913)
	at org.apache.hadoop.net.NetUtils.wrapException(NetUtils.java:828)
	at org.apache.hadoop.ipc.Client.getRpcResponse(Client.java:1616)
	at org.apache.hadoop.ipc.Client.call(Client.java:1558)
	at org.apache.hadoop.ipc.Client.call(Client.java:1455)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:242)
	at org.apache.hadoop.ipc.ProtobufRpcEngine2$Invoker.invoke(ProtobufRpcEngine2.java:129)
	at com.sun.proxy.$Proxy9.getServiceStatus(Unknown Source)
	at org.apache.hadoop.ha.protocolPB.HAServiceProtocolClientSideTranslatorPB.getServiceStatus(HAServiceProtocolClientSideTranslatorPB.java:136)
	at org.apache.hadoop.ha.HealthMonitor.doHealthChecks(HealthMonitor.java:197)
	at org.apache.hadoop.ha.HealthMonitor.access$600(HealthMonitor.java:49)
	at org.apache.hadoop.ha.HealthMonitor$MonitorDaemon.run(HealthMonitor.java:287)
Caused by: java.net.ConnectException: Connection refused
	at sun.nio.ch.SocketChannelImpl.checkConnect(Native Method)
	at sun.nio.ch.SocketChannelImpl.finishConnect(SocketChannelImpl.java:716)
	at org.apache.hadoop.net.SocketIOWithTimeout.connect(SocketIOWithTimeout.java:205)
	at org.apache.hadoop.net.NetUtils.connect(NetUtils.java:586)
	at org.apache.hadoop.ipc.Client$Connection.setupConnection(Client.java:711)
	at org.apache.hadoop.ipc.Client$Connection.setupIOstreams(Client.java:833)
	at org.apache.hadoop.ipc.Client$Connection.access$3800(Client.java:414)
	at org.apache.hadoop.ipc.Client.getConnection(Client.java:1677)
	at org.apache.hadoop.ipc.Client.call(Client.java:1502)

原因:dhfs用户没做免密登录到其他节点。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

时空无限

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值