一、准备
1、版本
hadoop-3.3.6
2、服务角色
No. | ip | role1 | role2 | role3 | role4 | role5 | role6 | remark |
1 | hadoop01 | nn | jn | dn | rm | nm | jhs | |
2 | hadoop02 | nn | jn | dn | rm | nm | ||
3 | hadoop03 | jn | dn | nm | ||||
4 | hadoop04 | dn | nm |
3、创建用户
useradd hdfs
4、ssh免密
ssh-keygen -b 1024 -t rsa
5、添加服务的DNS解析或hosts文件
二、服务部署
1、安装
cd /BigData/install
tar xvf hadoop-3.3.6.tar.gz
ln -snf /BigData/install/hadoop-3.3.6 /BigData/run/hadoop
2、定义环境变量
cat >> /etc/profile.d/hadoop.sh << EOF
#HADOOP
export HADOOP_HOME=/BigData/run/hadoop
export HADOOP_COMMON_HOME=${HADOOP_HOME}
export HADOOP_HDFS_HOME=${HADOOP_HOME}
export YARN_HOME=${HADOOP_HOME}
export HADOOP_YARN_HOME=${HADOOP_HOME}
export HADOOP_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export HDFS_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export YARN_CONF_DIR=${HADOOP_HOME}/etc/hadoop
export PATH=$HADOOP_HOME/bin:$HADOOP_HOME/sbin:$PATH
export HADOOP_CMD=${HADOOP_HOME}/bin/hadoop
export HADOOP_STREAMING=$HADOOP_HOME/share/hadoop/tools/lib/hadoop-streaming-2.5.0-cdh5.2.0.jar
export LD_LIBRARY_PATH=/usr/lib64:$LD_LIBRARY_PATH
export HADOOP_CLASSPATH=`hadoop classpath`
EOF
3、修改配置
core-site.xml
[hdfs@hadoop01 ~]# cat $HADOOP_HOME/etc/hadoop/core-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://jedy</value>
</property>
<property>
<name>ha.zookeeper.quorum</name>
<value>hadoop01.jedy.com.cn:12181,hadoop02.jedy.com.cn:12181,hadoop03.jedy.com.cn:12181</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>/data/store/hadoop/hdfs/tmp</value>
</property>
<property>
<name>io.compression.codecs</name>
<value>org.apache.hadoop.io.compress.GzipCodec,
org.apache.hadoop.io.compress.DefaultCodec,
org.apache.hadoop.io.compress.DeflateCodec,
com.hadoop.compression.lzo.LzoCodec,
com.hadoop.compression.lzo.LzopCodec,
org.apache.hadoop.io.compress.BZip2Codec,
org.apache.hadoop.io.compress.SnappyCodec,
org.apache.hadoop.io.compress.ZStandardCodec,
org.apache.hadoop.io.compress.Lz4Codec
</value>
</property>
<property>
<name>io.compression.codec.lzo.class</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>hadoop.proxyuser.root.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.root.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.groups</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hdfs.hosts</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
[hdfs@hadoop01 ~]$ cat $HADOOP_HOME/etc/hadoop/hdfs-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<property>
<name>dfs.nameservices</name>
<value>jedy</value>
</property>
<property>
<name>dfs.ha.namenodes.jedy</name>
<value>nn1,nn2</value>
</property>
<property>
<name>dfs.namenode.rpc-address.jedy.nn1</name>
<value>hadoop01.jedy.com.cn:8020</value>
</property>
<property>
<name>dfs.namenode.rpc-address.jedy.nn2</name>
<value>hadoop02.jedy.com.cn:8020</value>
</property>
<property>
<name>dfs.namenode.http-address.jedy.nn1</name>
<value>hadoop01.jedy.com.cn:50070</value>
</property>
<property>
<name>dfs.namenode.http-address.jedy.nn2</name>
<value>hadoop02.jedy.com.cn:50070</value>
</property>
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://hadoop01.jedy.com.cn:8485;hadoop02.jedy.com.cn:8485;hadoop03.jedy.com.cn:8485/jedy</value>
</property>
<property>
<name>dfs.client.failover.proxy.provider.jedy</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<property>
<name>dfs.namenode.datanode.registration.ip-hostname-check</name>
<value>false</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>/data/store/hadoop/hdfs/datanode</value>
</property>
<property>
<name>dfs.namenode.name.dir</name>
<value>/data/store/hadoop/hdfs/namenode</value>
</property>
<property>
<name>dfs.permissions.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.webhdfs.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.socket.write.timeout</name>
<value>18000000</value>
</property>
<property>
<name>dfs.socket.timeout</name>
<value>18000000</value>
</property>
<property>
<name>dfs.support.append</name>
<value>true</value>
</property>
<property>
<name>dfs.ha.fencing.methods</name>
<value>sshfence(hdfs:22)
sshfence(hdfs:22)
shell(/bin/true)
</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hdfs/.ssh/id_rsa</value>
</property>
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/store/hadoop/journalnode</value>
</property>
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.hdfs-blocks-metadata.enabled</name>
<value>true</value>
</property>
<property>
<name>dfs.client.read.shortcircuit</name>
<value>true</value>
</property>
<property>
<name>dfs.domain.socket.path</name>
<value>/data/store/hadoop/run/dfssocket</value>
</property>
<property>
<name>dfs.client.use.datanode.host</name>
<value>true</value>
</property>
<property>
<name>dfs.datanode.use.datanode.host</name>
<value>true</value>
</property>
<property>
<name>dfs.hosts.exclude</name>
<value>/BigData/run/hadoop/etc/hadoop/excludes</value>
</property>
<property>
<name>dfs.namenode.support.allow.format</name>
<value>false</value>
</property>
<property>
<name>dfs.namenode.acls.enabled</name>
<value>true</value>
</property>
</configuration>
[hdfs@hadoop01 ~]$
yarn-site.xml
[hdfs@hadoop01 ~]$ cat $HADOOP_HOME/etc/hadoop/yarn-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<configuration>
<property>
<description>Enable the web proxy connection timeout, default is enabled.</description>
<name>yarn.resourcemanager.proxy.timeout.enabled</name>
<value>true</value>
</property>
<property>
<description>The web proxy connection timeout.</description>
<name>yarn.resourcemanager.proxy.connection.timeout</name>
<value>60000</value>
</property>
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>jedy</value>
</property>
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>hadoop01.jedy.com.cn:12181,hadoop02.jedy.com.cn:12181,hadoop03.jedy.com.cn:12181</value>
</property>
<property>
<name>yarn.resourcemanager.ha.automatic-failover.zk-base-path</name>
<value>/yarn-leader-election</value>
</property>
<property>
<name>yarn.resourcemanager.zk-state-store.parent-path</name>
<value>/rmstore</value>
</property>
<property>
<name>yarn.nodemanager.remote-app-log-dir</name>
<value>/tmp/logs</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>hadoop01.jedy.com.cn</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>hadoop02.jedy.com.cn</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm1</name>
<value>hadoop01.jedy.com.cn:8032</value>
</property>
<property>
<name>yarn.resourcemanager.address.rm2</name>
<value>hadoop02.jedy.com.cn:8032</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm1</name>
<value>hadoop01.jedy.com.cn:8030</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.address.rm2</name>
<value>hadoop02.jedy.com.cn:8030</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm1</name>
<value>hadoop01.jedy.com.cn:8031</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.address.rm2</name>
<value>hadoop02.jedy.com.cn:8031</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm1</name>
<value>hadoop01.jedy.com.cn:8033</value>
</property>
<property>
<name>yarn.resourcemanager.admin.address.rm2</name>
<value>hadoop02.jedy.com.cn:8033</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>hadoop01.jedy.com.cn:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>hadoop02.jedy.com.cn:8088</value>
</property>
<property>
<name>yarn.nodemanager.resource.memory-mb</name>
<value>204800</value>
</property>
<property>
<name>yarn.nodemanager.resource.cpu-vcores</name>
<value>50</value>
</property>
<property>
<name>yarn.nodemanager.local-dirs</name>
<value>/data/store/hadoop/yarn/nodemanager/local</value>
</property>
<property>
<name>yarn.nodemanager.log-dirs</name>
<value>/data/store/hadoop/yarn/nodemanager/logs</value>
</property>
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<property>
<name>yarn.resourcemanager.recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.nodemanager.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.timeline-service.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.application.classpath</name>
<value>$HADOOP_CONF_DIR,$HADOOP_COMMON_HOME/share/hadoop/common/*,$HADOOP_HOME/lib/*,
$HADOOP_COMMON_HOME/share/hadoop/common/lib/*,
$HADOOP_HDFS_HOME/share/hadoop/hdfs/*,$HADOOP_HDFS_HOME/share/hadoop/hdfs/lib/*,
$YARN_HOME/share/hadoop/yarn/*,$YARN_HOME/share/hadoop/yarn/lib/*,
$YARN_HOME/share/hadoop/mapreduce/*,$YARN_HOME/share/hadoop/mapreduce/lib/*,
$HADOOP_HOME/share/hadoop/common/lib/*,$HADOOP_HOME/share/hadoop/common/*,$HADOOP_HOME/share/hadoop/hdfs/*,$HADOOP_HOME/share/hadoop/hdfs/lib/*,$HADOOP_HOME/share/hadoop/mapreduce/*,$HADOOP_HOME/share/hadoop/mapreduce/lib/*,$HADOOP_HOME/share/hadoop/yarn/lib/*,$HADOOP_HOME/share/hadoop/yarn/*,$HADOOP_HOME/share/hadoop/tools/lib/*,$HADOOP_HOME/share/hadoop/tools/*
</value>
</property>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.nodemanager.aux-services.mapreduce_shuffle.class</name>
<value>org.apache.hadoop.mapred.ShuffleHandler</value>
</property>
<property>
<name>yarn.log-aggregation-enable</name>
<value>true</value>
<final>true</final>
</property>
<property>
<name>yarn.log-aggregation.retain-seconds</name>
<value>2592000</value>
</property>
<property>
<name>yarn.log-aggregation.retain-check-interval-seconds</name>
<value>-1</value>
</property>
<property>
<name>yarn.resourcemanager.resource-tracker.client.thread-count</name>
<value>100</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.client.thread-count</name>
<value>100</value>
</property>
<property>
<name>yarn.scheduler.increment-allocation-mb</name>
<value>512</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-mb</name>
<value>1024</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-mb</name>
<value>40960</value>
</property>
<property>
<name>yarn.scheduler.minimum-allocation-vcores</name>
<value>1</value>
</property>
<property>
<name>yarn.scheduler.maximum-allocation-vcores</name>
<value>20</value>
</property>
<property>
<name>yarn.scheduler.fair.allow-undeclared-pools</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.vmem-pmem-ratio</name>
<value>3.5</value>
</property>
<property>
<name>yarn.resourcemanager.scheduler.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.scheduler.capacity.CapacityScheduler</value>
</property>
<property>
<name>yarn.scheduler.fair.allocation.file</name>
<value>fair-scheduler.xml</value>
</property>
<property>
<name>yarn.resourcemanager.client.thread-count</name>
<value>50</value>
</property>
<property>
<name>yarn.nodemanager.localizer.client.thread-count</name>
<value>20</value>
</property>
<property>
<name>yarn.resourcemanager.max-completed-applications</name>
<value>10000</value>
</property>
<property>
<name>yarn.nodemanager.localizer.fetch.thread-count</name>
<value>16</value>
</property>
<property>
<name>yarn.nodemanager.pmem-check-enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.nodemanager.vmem-check-enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.amliveliness-monitor.interval-ms</name>
<value>10000</value>
</property>
<property>
<name>yarn.resourcemanager.store.class</name>
<value>org.apache.hadoop.yarn.server.resourcemanager.recovery.ZKRMStateStore</value>
</property>
<property>
<name>yarn.scheduler.fair.assignmultiple</name>
<value>true</value>
</property>
<property>
<name>yarn.scheduler.fair.max.assign</name>
<value>5</value>
</property>
<property>
<name>yarn.scheduler.fair.user-as-default-queue</name>
<value>false</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-healthy-disks</name>
<value>1</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.max-disk-utilization-per-disk-percentage</name>
<value>97</value>
</property>
<property>
<name>yarn.nodemanager.disk-health-checker.min-free-space-per-disk-mb</name>
<value>204800</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.scripts</name>
<value>custom-disk-check</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.custom-disk-check.interval-ms</name>
<value>600000</value>
</property>
<property>
<name>yarn.nodemanager.health-checker.custom-disk-check.timeout-ms</name>
<value>120000</value>
</property>
<property>
<name>yarn.resourcemanager.nodemanagers.heartbeat-interval-ms</name>
<value>1000</value>
</property>
<property>
<name>yarn.resourcemanager.am.max-attempts</name>
<value>3</value>
</property>
<property>
<name>yarn.resourcemanager.work-preserving-recovery.enabled</name>
<value>true</value>
</property>
<property>
<name>yarn.resourcemanager.zk-retry-interval-ms</name>
<value>5000</value>
</property>
<property>
<name>yarn.resourcemanager.zk-num-retries</name>
<value>20</value>
</property>
<property>
<name>yarn.cluster.max-application-priority</name>
<value>15</value>
</property>
<property>
<name>yarn.resourcemanager.nodes.exclude-path</name>
<value>/BigData/run/hadoop/etc/hadoop/yarn_excludes</value>
</property>
</configuration>
[hdfs@hadoop01 ~]$
mapredu-site.xml
[hdfs@hadoop01 ~]$ cat $HADOOP_HOME/etc/hadoop/mapred-site.xml
<?xml version="1.0" encoding="UTF-8"?>
<?xml-stylesheet type="text/xsl" href="configuration.xsl"?>
<configuration>
<property>
<name>mapreduce.jobhistory.address</name>
<value>hadoop01.jedy.com.cn:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>hadoop01.jedy.com.cn:10020</value>
</property>
<property>
<name>mapreduce.jobhistory.admin.address</name>
<value>0.0.0.0:19888</value>
</property>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<property>
<name>mapreduce.jobhistory.bind-host</name>
<value>0.0.0.0</value>
</property>
<property>
<name>yarn.app.mapreduce.am.resource.mb</name>
<value>2048</value>
</property>
<property>
<name>mapreduce.map.memory.mb</name>
<value>2560</value>
</property>
<property>
<name>mapreduce.reduce.memory.mb</name>
<value>2560</value>
</property>
<property>
<name>mapreduce.map.java.opts</name>
<value>-Xmx2048m</value>
</property>
<property>
<name>mapreduce.reduce.java.opts</name>
<value>-Xmx2048m</value>
</property>
<property>
<name>mapreduce.jobhistory.intermediate-done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done_intermediate</value>
</property>
<property>
<name>mapreduce.jobhistory.done-dir</name>
<value>${yarn.app.mapreduce.am.staging-dir}/history/done</value>
</property>
<property>
<name>mapreduce.jobhistory.move.interval-ms</name>
<value>180000</value>
</property>
<property>
<name>mapreduce.jobhistory.move.thread-count</name>
<value>5</value>
</property>
<property>
<name>mapreduce.jobhistory.cleaner.enable</name>
<value>true</value>
</property>
<property>
<name>mapreduce.jobhistory.cleaner.interval-ms</name>
<value>86400000</value>
</property>
<property>
<name>mapreduce.jobhistory.max-age-ms</name>
<value>604800000</value>
</property>
<property>
<name>mapreduce.jobhistory.client.thread-count</name>
<value>20</value>
</property>
<property>
<name>mapreduce.jobhistory.datestring.cache.size</name>
<value>200000</value>
</property>
<property>
<name>mapreduce.jobhistory.joblist.cache.size</name>
<value>160000</value>
</property>
<property>
<name>mapreduce.jobhistory.loadedjobs.cache.size</name>
<value>20</value>
</property>
<property>
<name>mapreduce.client.submit.file.replication</name>
<value>30</value>
</property>
<property>
<name>mapreduce.map.failures.maxpercent</name>
<value>0</value>
</property>
<property>
<name>mapreduce.reduce.failures.maxpercent</name>
<value>0</value>
</property>
<property>
<name>mapreduce.map.maxattempts</name>
<value>3</value>
</property>
<property>
<name>mapreduce.reduce.maxattempts</name>
<value>3</value>
</property>
<property>
<name>io.sort.mb</name>
<value>100</value>
</property>
<property>
<name>mapreduce.job.reduce.slowstart.completedmaps</name>
<value>0.99</value>
</property>
<property>
<name>mapreduce.map.output.compress</name>
<value>true</value>
</property>
<property>
<name>mapreduce.map.output.compress.codec</name>
<value>com.hadoop.compression.lzo.LzoCodec</value>
</property>
<property>
<name>mapred.child.env</name>
<value>LD_LIBRARY_PATH=/usr/lib64</value>
</property>
<property>
<name>mapreduce.job.counters.max</name>
<value>1000</value>
</property>
<property>
<name>mapreduce.job.max.split.locations</name>
<value>500</value>
</property>
<property>
<name>mapreduce.client.submit.file.replication</name>
<value>30</value>
</property>
<property>
<name>mapreduce.job.queuename</name>
<value>root.default</value>
</property>
</configuration>
[hdfs@hadoop01 ~]$
三、namenode初始化(首次启动)
1、启动ZK
ZK服务部署参考:Zookeeper 多实例安装-CSDN博客
在所有的ZK节点执行命令(若未启动):
zkServer.sh start
2、格式化ZK( 仅第一次需要做 )
任意ZK节点上执行:
hdfs zkfc -formatZK
3、启动ZKFC
hadoop-daemon.sh start zkfc
4、启动JournalNode
在各个JN节点上启动:
hadoop-daemon.sh start journalnode
5、格式化namenode
注意:只有第一次启动系统时需格式化,请勿重复格式化!
hdfs namenode -format
6、在 主NN节点 执行命令启动NN:
hadoop-daemon.sh start namenode
7、在 备NN 上同步主NN的元数据信息
hdfs namenode -bootstrapStandby
8、启动备NN
在备NN上执行命令:
hadoop-daemon.sh start namenode
9、设置主NN
( 这一步可以省略 ,这是在设置手动切换NN时的步骤,ZK已经自动选择一个节点作为主NN了)
hdfs haadmin -transitionToActive nn1
10、效果验证--主备自动切换
hdfs haadmin -failover nn1 nn2
四、常规启动
启动ZKFC
在两个NN节点上启动
su - hdfs -c 'hdfs --daemon start zkfc'
启动JN
在各个JN节点上启动
su - hdfs -c 'hdfs --daemon start journalnode'
启动NN
在两个NN节点上启动
su - hdfs -c 'hdfs --daemon start namenode'
启动DN
su - hdfs -c 'hdfs --daemon start datanode'
启动RM
su - hdfs -c 'yarn --daemon start resourcemanager'
启动NM
su - hdfs -c 'yarn --daemon start nodemanager'
五、验证
hadoop jar $HADOOP_HOME/share/hadoop/mapreduce/hadoop-mapreduce-examples-3.3.6.jar pi 20 20
六、访问Web UI
http://hadoop01.jedy.com.cn:50070
七、服务扩容
1、节点部署
2、服务启动
启动DN
su - hdfs -c 'hdfs --daemon start datanode'
启动NM
su - hdfs -c 'yarn --daemon start nodemanager'
八、服务缩容
1、配置更新
将需退役的节点主机名加入
$HADOOP_HOME/etc/hadoop/excludes
$HADOOP_HOME/etc/hadoop/yarn_excludes
2、刷新NN
su - hdfs -c "hdfs dfsadmin -refreshNodes"
3、刷新RM
su - hdfs -c "yarn rmadmin -refreshNodes "
4、关闭服务
http://hadoop01.jedy.com.cn:50070/dfshealth.html#tab-datanode 显示退役完成后关闭服务