HA配置过程
-----------------
0.选型
挑选指定的namenode。
s101
s105
1.配置s101和s105互相您能够ssh
2.配置文件
[=============core-site.xml============]
<?xml version="1.0"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/centos/hadoop/ha/journalnode</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/centos/hadoop/ha</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>s102:2181,s103:2181,s104:2181</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>0</value>
<description>
删除文件后在回收站保留的分钟数。默认0,表示禁用回收站。
</description>
</property>
<property>
<name>fs.trash.checkpoint.interval</name>
<value>0</value>
<description>两次检查回收站的间隔数(默认0分钟),0表示和fs.trash.interval相同。</description>
</property>
<property>
<name>hadoop.http.staticuser.user</name>
<value>centos</value>
</property>
<!--
<property>
<name>net.topology.node.switch.mapping.impl</name>
<value>com.it18zhang.hadoop.rackaware.MyDNSToSwitchMapping</value>
</property>
-->
<property>
<name>hadoop.proxyuser.centos.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.centos.groups</name>
<value>*</value>
</property>
</configuration>
原始配置文件
<?xml version="1.0"?>
<!-- value标签需要写本机ip -->
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://mycluster</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/home/centos/ha/dfs/journal/node/local/data</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/home/centos/ha</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>s102:2181,s103:2181,s104:2181</value>
</property>
<property>
<name>hadoop.proxyuser.hue.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hue.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.httpfs.groups</name>
<value>*</value>
</property>
</configuration>
[=============hdfs-site.xml============]
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>s101:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>s105:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>s101:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>s105:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://s102:8485;s103:8485;s104:8485/mycluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/centos/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>s105:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>file://${hadoop.tmp.dir}/dfs/name1,file://${hadoop.tmp.dir}/dfs/name2</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file://${hadoop.tmp.dir}/dfs/data1,file://${hadoop.tmp.dir}/dfs/data2</value>
</property>
<property>
<name>dfs.namenode.fs-limits.min-block-size</name>
<value>512</value>
</property>
<property>
<name>dfs.hosts</name>
<value>/soft/hadoop/etc/hadoop/dfs_include.conf</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/soft/hadoop/etc/hadoop/dfs_exclude.conf</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>false</value>
</property>
</configuration>
原始的配置文件
<?xml version="1.0"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.namenode.secondary.http-address</name>
<value>s105:50090</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/home/centos/ha/dfs/name1,/home/centos/ha/dfs/name2</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/home/centos/ha/dfs/data1,/home/centos/ha/dfs/data2</value>
</property>
<!-- hdfs高可用配置 -->
<property>
<name>dfs.nameservices</name>
<value>mycluster</value>
</property>
<property>
<name>dfs.ha.namenodes.mycluster</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn1</name>
<value>s101:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.mycluster.nn2</name>
<value>s105:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn1</name>
<value>s101:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.mycluster.nn2</name>
<value>s105:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://s102:8485;s103:8485;s104:8485/mycluster</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.mycluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>shell(/bin/true)</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
</configuration>
3.分发所有节点
xsync.sh core-site.xml
xsync.sh hdfs-site.xml
重新搭建ha集群
-----------------
1.停止hadoop集群
$>stop-dfs.sh
2.删除所有log和本地文件
$>xcall.sh rm -rf /soft/hadoop/logs/*
$>xcall.sh rm -rf /home/centos/hadoop/ha/*
3.登录jn节点启动jn进程
[s102 ~ s104]
$>hadoop-daemon.sh start journalnode
4.格式化文件系统
[s101]
hdfs namenode -format
名称节点上的集群版本
数据节点上的集群版本
5.同步101名称节点数据到105
[s101]
scp -r /home/centos/hadoop/ha centos@s105:/home/centos/hadoop
6.在s105上进行引导
6.1)需要先启动s101的namenode
[s101]
hadoop-daemon.sh start namenode
6.2)执行bootstrap
[s105]
hdfs namenode -bootstrapStandby
注意:不要重新格式化 N
7)初始化编辑日志到jn集群
[没有启动nn的节点上执行]
$>hdfs namenode -initializeSharedEdits
注意:N
8)使用管理命令
#切换状态
$>hdfs haadmin -transitionToActive nn1 --forcemanual
#查询状态
$>hdfs haadmin -getServiceState nn1
#容灾演练
$>hdfs haadmin -failover nn2
9)支持自动容灾配置
9.1)引入两个组件
a)zk quorum
b)zkfc
zkfc是nn节点伴随节点(贴身保镖),有一下三个职责:
1.监控nn的健康状况
周期ping,收达响应说明健康,否则不健康。
2.zk session管理
3.zk选举
9.2)配置自动容灾
略
9.3)格式化zkfc
$>hdfs zkfc -formatZK
hadoop-daemon.sh start zkfc
测试