目录
Hadoop HA
Hadoop HA模式参照官网文档应该可以说是各组件的HA,比如HDFS HA, YARN HA 。
其中HDFS HA 又有QJM与NFS方式:
QJM方式采用JournalNode来共享edits文件,而NFS方式则采用NFS远程共享目录来共享edits文件。
NFS的方式的HA的配置与启动,和QJM方式基本上是一样,不同的地方是active namenode和standby namenode共享edits文件的方式。
官网学习地址:http://hadoop.apache.org/docs/r2.7.7/
接下来部署HDFS HA(QJM) & Yarn HA
基础步骤
# 将hadoop包分发到需要安装的bdp组下各个节点的/home/hadoop/app/
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7.tar.gz dest=/home/hadoop/app owner=hadoop group=hadoop'
# 解压hadoop-2.7.7.tar.gz至指定目录/app/gisbdp/目录下
ansible bdp -m shell -a 'tar -xzvf /home/hadoop/app/hadoop-2.7.7.tar.gz -C /app/gisbdp/'
# 创建软链接
ansible bdp -m shell -a 'ln -s /app/gisbdp/hadoop-2.7.7 /app/gisbdp/hadoop'
# 在# ~/.bashrc 中新增hadoop属性
#set hadoop environment
export HADOOP_HOME=/app/gisbdp/hadoop
export HADOOP_CONF_DIR=$HADOOP_HOME/etc/hadoop
export HADOOP_COMMON_HOME=$HADOOP_HOME
export HADOOP_HDFS_HOME=$HADOOP_HOME
export HADOOP_MAPRED_HOME=$HADOOP_HOME
export HADOOP_YARN_HOME=$HADOOP_HOME
export HADOOP_COMMON_LIB_NATIVE_DIR=$HADOOP_HOME/lib/native
export HADOOP_OPTS="-Djava.library.path=$HADOOP_HOME/lib"
export PATH=$PATH:$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$HADOOP_HOME/lib
# 更新.bashrc
ansible bdp -m copy -a 'src=/home/hadoop/.bashrc dest=/home/hadoop/.bashrc'
# 同样为了方便Ansible分发,将/app/gisbdp/hadoop/etc/hadoop/* config配置文件复制一份到/home/hadoop/app/hadoop-2.7.7/etc/hadoop/目录下
# node1 节点
[hadoop@node1 etc]$ cp -R /app/gisbdp/hadoop/etc/hadoop /home/hadoop/app/hadoop-2.7.7/etc/
# 创建目录
ansible bdp -m file -a 'path=/data state=directory owner=hadoop group=hadoop mode=755 force=yes'
hadoop-conf主要配置文件
# 修改hadoop-conf内容,主要修改$HADOOP_CONF_DIR目录下的以下文件:
yarn-site.xml mapred-site.xml hdfs-site.xml core-site.xml fair-scheduler.xml slaves
# 更新hadoop-conf
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/yarn-site.xml dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/mapred-site.xml dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/hdfs-site.xml dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/core-site.xml dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/fair-scheduler.xml dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/slaves dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/hosts.include dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/hosts.exclude dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/rmhosts.include dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/rmhosts.exclude dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
ansible bdp -m copy -a 'src=/home/hadoop/app/hadoop-2.7.7/etc/hadoop/hadoop-env.sh dest=/app/gisbdp/hadoop/etc/hadoop owner=hadoop group=hadoop'
core-site.xml
<configuration>
<!-- 指定hdfs的nameservice为gisbdp -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://gisbdp</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/hdfs/tmp</value>
</property>
<!-- 指定zookeeper地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>node3:2181,node4:2181,node5:2181</value>
</property>
<property>
<name>fs.trash.interval</name>
<value>2880</value>
</property>
<property>
<name>net.topology.script.file.name</name>
<value>/app/hadoop-conf/rack.sh</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>
org.apache.hadoop.io.compress.SnappyCodec,org.apache.hadoop.io.compress.DefaultCodec,org.apache.hadoop.io.compress.GzipCodec,org.apache.hadoop.io.compress.BZip2Codec,org.apache.hadoop.io.compress.Lz4Codec
</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hive.groups</name>
<value>*</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.address</name>
<value>node1:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.admin.address</name>
<value>node1:10033</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>node1:19888</value>
</property>