Hadoop 3.1.0 高可靠
机器配置
主机名称 | IP | 属性 |
ms3 | 192.168.242.3 | namenode |
ms4 | 192.168.242.4 | namenode |
ms5 | 192.168.242.5 | datanode |
ms6 | 192.168.242.6 | datanode |
ms7 | 192.168.242.7 | datanode |
配置静态IP
修改
/etc/sysconfig/network-scripts
修改内容
TYPE="Ethernet"
PROXY_METHOD="none"
BROWSER_ONLY="no"
#BOOTPROTO="dhcp"
BOOTPROTO="static"
DEFROUTE="yes"
IPV4_FAILURE_FATAL="no"
IPV6INIT="yes"
IPV6_AUTOCONF="yes"
IPV6_DEFROUTE="yes"
IPV6_FAILURE_FATAL="no"
IPV6_ADDR_GEN_MODE="stable-privacy"
NAME="ens33"
UUID="818b75d3-c01a-472e-930b-7bf8777ef7a1"
DEVICE="ens33"
ONBOOT="yes"
# 修改每一台机器的IP
IPADDR=192.168.242.3
GATEWAY=192.168.242.2
NETMASK=255.255.255.0
NM_CONTROLLED="no"
DNS1=114.114.114.114
DNS2=8.8.8.8
重新启动网络服务
service network restart
完整的hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.242.1 ms
192.168.242.3 ms3
192.168.242.4 ms4
192.168.242.5 ms5
192.168.242.6 ms6
192.168.242.7 ms7
192.168.242.8 ms8
关闭防火墙
#临时关闭
systemctl stop firewalld
#禁止开机启动
systemctl disable firewalld
# 查看防火墙状态
firewall-cmd --state
# 重新启动机器
reboot
配置SSH
主节点
ssh-keygen -t rsa -P '' -f ~/.ssh/id_rsa
cat ~/.ssh/id_rsa.pub >> ~/.ssh/authorized_keys
chmod 0600 ~/.ssh/authorized_keys
拷贝到其他节点
scp /root/.ssh/id_rsa.pub root@子节点主机或者IP:~
其他节点操作
mkdir -p ~/.ssh
cd ~/.ssh/
cat ~/id_rsa.pub >> authorized_keys
vim /etc/ssh/sshd_config
# 如果是用root用户登录请开启
PermitRootLogin yes
# 重新启动ssh
service sshd restart
配置JAVA
解压
添加环境变量
export JAVA_HOME=/usr/local/ms/jdk1.8.0_181
export PATH=$PATH:$JAVA_HOME/bin
配置ZooKeeper
只需要在ms5,ms6,ms7 配置
解压
修改配置文件
| 路径
/usr/local/ms/zookeeper-3.4.13/conf
| shell 命令
cp zoo_sample.cfg zoo.cfg
# 只需要修改dataDir
dataDir=/opt/zookeeper/data
# 文件末尾添加
server.1=ms5:2888:3888
server.2=ms6:2888:3888
server.3=ms7:2888:3888
添加环境变量
export ZOOKEEPER_HOME=/usr/local/ms/zookeeper-3.4.13
export PATH=$PATH:$ZOOKEEPER_HOME/bin:$ZOOKEEPER_HOME/conf
创建文件夹
mkdir -p /opt/zookeeper/data
写入文件
在ms5,ms6,ms7中分别执行
#ms5
echo 1 /opt/zookeeper/data/myid
#ms6
echo 2 /opt/zookeeper/data/myid
#ms7
echo 3 /opt/zookeeper/data/myid
配置Hadoop
解压
修改配置文件
修改 hadoop-env.sh
export JAVA_HOME=/usr/local/ms/jdk1.8.0_181
export HADOOP_HOME=/usr/local/ms/hadoop-3.1.0
hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!--指定hdfs的nameservice为ns1,需要和core-site.xml中的保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>ns1</value>
</property>
<!-- ns1下面有两个NameNode,分别是nn1,nn2 -->
<property>
<name>dfs.ha.namenodes.ns1</name>
<value>nn1,nn2</value>
</property>
<!-- nn1的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn1</name>
<value>ms3:9000</value>
</property>
<!-- nn1的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn1</name>
<value>ms3:50070</value>
</property>
<!-- nn2的RPC通信地址 -->
<property>
<name>dfs.namenode.rpc-address.ns1.nn2</name>
<value>ms4:9000</value>
</property>
<!-- nn2的http通信地址 -->
<property>
<name>dfs.namenode.http-address.ns1.nn2</name>
<value>ms4:50070</value>
</property>
<!-- 指定NameNode的元数据在JournalNode上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://ms5:8485;ms6:8485;ms7:8485/ns1</value>
</property>
<!-- 指定JournalNode在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/opt/hadoop/journaldata</value>
</property>
<!-- 开启NameNode失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.ns1</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制暂用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用sshfence隔离机制时需要ssh免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/root/.ssh/id_rsa</value>
</property>
<!-- 配置sshfence隔离机制超时时间 -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 指定hdfs的nameservice为ns1 -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://ns1/</value>
</property>
<!-- 指定hadoop临时目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>/opt/hadoop/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>ms5:2181,ms6:2181,ms7:2181</value>
</property>
</configuration>
yarm-site.xml
<?xml version="1.0"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<configuration>
<!-- 开启RM高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定RM的cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>yrc</value>
</property>
<!-- 指定RM的名字 -->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定RM的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>ms3</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>ms4</value>
</property>
<!-- 指定zk集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>ms5:2181,ms6:2181,ms7:2181</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
mapred-site.xml
<?xml version="1.0"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<!--
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. See accompanying LICENSE file.
-->
<!-- Put site-specific property overrides in this file. -->
<configuration>
<!-- 指定mr框架为yarn方式 -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.application.classpath</name>
<value>
/usr/local/ms/hadoop-3.1.0/etc/hadoop,
/usr/local/ms/hadoop-3.1.0/share/hadoop/common/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/common/lib/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/hdfs/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/hdfs/lib/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/mapreduce/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/mapreduce/lib/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/yarn/*,
/usr/local/ms/hadoop-3.1.0/share/hadoop/yarn/lib/*
</value>
</property>
</configuration>
修改workers文件 添加主机名称
ms5
ms6
ms7
修改 /usr/local/ms/hadoop-3.1.0/sbin
在start-dfs.sh 和 stop-dfs.sh 中添加
HDFS_NAMENODE_USER=root
HDFS_DATANODE_USER=root
HDFS_JOURNALNODE_USER=root
HDFS_ZKFC_USER=root
在start-yarn.sh 和 stop-yarn.sh 中添加
YARN_RESOURCEMANAGER_USER=root
YARN_NODEMANAGER_USER=root
拷贝到其他节点
scp -r hadoop-3.1.0 主机名:/usr/local/ms/
创建文件夹
rm -rf /opt/hadoop/*
mkdir -p /opt/hadoop/journaldata
mkdir -p /opt/hadoop/tmp
启动
在ms5,ms6,ms7 中启动
zkServer.sh start
查看zookeeper启动状态
zkServer.sh status
在ms5,ms6,ms7 中启动
hadoop-daemon.sh start journalnode
运行jps命令查看是否有 JournalNode 进程
ms3格式化HDFS
hdfs namenode -format
ms4运行
hdfs namenode -bootstrapStandby
ms1 格式化
hdfs zkfc -formatZK
启动HDFS
start-dfs.sh
启动YARN
start-yarn.sh
访问UI
效果
测试集群工作状态的指令
# 查看hdfs的各节点状态信息
hdfs dfsadmin -report
# 获取一个namenode节点的HA状态
hdfs haadmin -getServiceState nn1
# 单独启动一个namenode进程
hadoop-daemon.sh start namenode
# 单独启动一个zkfc进程
hadoop-daemon.sh start zkfc