基础配置
切换清华源
备份,将 CentOS-Base.repo 为CentOS-Base.repo.backup
cp /etc/yum.repos.d/CentOS-Base.repo /etc/yum.repos.d/CentOS-Base.repo.backup
编辑 /etc/yum.repos.d/Centos-Base.repo
# CentOS-Base.repo
#
# The mirror system uses the connecting IP address of the client and the
# update status of each mirror to pick mirrors that are updated to and
# geographically close to the client. You should use this for CentOS updates
# unless you are manually picking other mirrors.
#
# If the mirrorlist= does not work for you, as a fall back you can try the
# remarked out baseurl= line instead.
#
#
[base]
name=CentOS-$releasever - Base
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/os/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=os
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
#released updates
[updates]
name=CentOS-$releasever - Updates
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/updates/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=updates
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
#additional packages that may be useful
[extras]
name=CentOS-$releasever - Extras
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/extras/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=extras
gpgcheck=1
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
#additional packages that extend functionality of existing packages
[centosplus]
name=CentOS-$releasever - Plus
baseurl=https://mirrors.tuna.tsinghua.edu.cn/centos/$releasever/centosplus/$basearch/
#mirrorlist=http://mirrorlist.centos.org/?release=$releasever&arch=$basearch&repo=centosplus
gpgcheck=1
enabled=0
gpgkey=file:///etc/pki/rpm-gpg/RPM-GPG-KEY-CentOS-7
清除缓存
yum clean all # 清除系统所有的yum缓存
yum makecache # 生成yum缓存
配置清华epel源
安装epel-release
yum install epel-release
修改/etc/yum.repos.d/epel.repo,将mirrorlist和metalink开头的行注释掉。
接下来,取消注释这个文件里baseurl开头的行,并将其中的http://download.fedoraproject.org/pub替换成https://mirrors.tuna.tsinghua.edu.cn。
可以使用如下命令自动替换:
sed -e 's!^metalink=!#metalink=!g' \
-e 's!^#baseurl=!baseurl=!g' \
-e 's!//download\.fedoraproject\.org/pub!//mirrors.tuna.tsinghua.edu.cn!g' \
-e 's!http://mirrors!https://mirrors!g' \
-i /etc/yum.repos.d/epel.repo /etc/yum.repos.d/epel-testing.repo
运行 yum update
测试一下吧
修改主机名
hostnamectl set-hostname master
新建用户
# 添加用户 hadoop
useradd -m hadoop
# 为用户 hadoop 设置密码
passwd hadoop
# 为用户 hadoop 添加 sudo 权限
vim /etc/sudoers
hadoop ALL=(ALL) ALL
安装SSH
# 安装
yum install -y openssl openssh-server
# 配置
vim /etc/ssh/sshd_config
PermitRootLogin no
PubkeyAuthentication yes
AuthorizedKeysFile .ssh/authorized_keys
PasswordAuthentication no
# 启动
systemctl start sshd.service
# 开机自启
systemctl enable sshd.service
mkdir .ssh
chmod 700 .ssh
ssh-keygen -t rsa -b 4096
vim authorized_keys
chmod 600 authorized_keys
# 添加公钥到authorized_keys
ssh-copy-id -i ~/.ssh/id_rsa.pub devbox-01
ssh-copy-id -i ~/.ssh/id_rsa.pub devbox-02
ssh-copy-id -i ~/.ssh/id_rsa.pub devbox-03
时钟同步
# 修改时区,/etc/profile
TZ='Asia/Shanghai'; export TZ
# 安装 ntp
yum install -y ntp
# 修改master配置/etc/ntp.conf
server 127.127.1.0 # local clock
fudge 127.127.1.0 stratum 10
# slave同步
ntpdate master
# 定时同步
crontab -e
*/10 * * * * /usr/sbin/ntpdate master
安装JDK
JAVA_HOME=/usr/java/jdk1.8.0_301
CLASSPATH=.:${JAVA_HOME}/jre/lib/rt.jar:${JAVA_HOME}/lib/dt.jar:${JAVA_HOME}/lib/tools.jar
export PATH=$PATH:${JAVA_HOME}/bin JAVA_HOME CLASSPATH
关闭防火墙
# 查看防火墙状态
firewall-cmd --state
# 停止firewall
systemctl stop firewalld.service
# 禁止firewall开机启动
systemctl disable firewalld.service
配置hosts
192.168.144.1 master1
192.168.144.2 slave1
192.168.144.3 slave2
Hadoop普通集群部署
Hadoop配置
hadoop-env.sh
export JAVA_HOME=/usr/java/jdk1.8.0_301
export HADOOP_HOME=/usr/local/hadoop-3.3.1
core-site.xml
<configuration>
<property>
<name>fs.defaultFS</name>
<value>hdfs://master1:9000</value>
</property>
<property>
<name>io.file.buffer.size</name>
<value>131072</value>
</property>
<property>
<name>hadoop.tmp.dir</name>
<value>file:/home/hadoop/tmp</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
hdfs-site.xml
<configuration>
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/home/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/home/hadoop/dfs/data</value>
</property>
<property>
<name>dfs.replication</name>
<value>2</value>
</property>
<property>
<name>dfs.permissions</name>
<value>false</value>
</property>
</configuration>
mapred-site.xml
<configuration>
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
</configuration>
yarn-site.xml
<configuration>
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
</configuration>
slaves/workers
slave1
slave2
slave3
启动
# master
# 格式化namenode
hdfs namenode -format
# 启动
# 启动HDFS
sbin/start-dfs.sh
# 或
sbin/hadoop-daemon.sh --config $HADOOP_CONF_DIR --script hdfs start namenode
sbin/hadoop-daemons.sh --config $HADOOP_CONF_DIR --script hdfs start datanode
# 启动YARN
sbin/start-yarn.sh
# 或
sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start resourcemanager
sbin/yarn-daemons.sh --config $HADOOP_CONF_DIR start nodemanager
sbin/yarn-daemon.sh --config $HADOOP_CONF_DIR start proxyserver
# 启动job-history
sbin/mr-jobhistory-daemon.sh --config $HADOOP_CONF_DIR start historyserver
添加新节点
修改各节点slaves/workers、hosts文件
启动datanode、nodemanager
查看web节点是否添加成功
负载均衡:hdfs balancer
删除结点
修改NameNode节点的hdfs-site.xml
文件,添加以下内容:
<property>
<name>dfs.hosts.exclude</name>
<value>{hadoop_home}/etc/hadoop/dfs_exclude</value>
</property>
将要删除的DataNode节点添加到dfs_exclude文件中
在NameNode上执行
# 刷新节点
hdfs dfsadmin -refreshNodes
# 查看节点状态 Decommissioned
hdfs dfsadmin -report
停止删除了的DataNode节点
删除DataNode数据
高可用集群部署
Zookeeper安装
# 修改配置文件:zoo.cfg
dataDir=/data/zookeeper
server.1=192.168.144.1:2888:3888
server.2=192.168.144.2:2888:3888
server.3=192.168.144.3:2888:3888
# 创建myid文件
# 每台机器的myid里面的值对应server.后面的数字
echo 1 > /data/zookeeper/myid
# 每台机器启动zookeeper服务
bin/zkServer.sh start
# 查看集群状态
bin/zkServer.sh status
配置文件
修改 core-site.xml 文件
<configuration>
<!-- 指定 hdfs 的 nameservice -->
<property>
<name>fs.defaultFS</name>
<value>hdfs://nn-cluster</value>
</property>
<property>
<name>io.file.buffer.zise</name>
<value>131072</value>
</property>
<!-- 指定 hadoop 工作目录 -->
<property>
<name>hadoop.tmp.dir</name>
<value>file:/data/hadoop</value>
</property>
<!-- 指定 zookeeper 集群访问地址 -->
<property>
<name>ha.zookeeper.quorum</name>
<value>master1:2181,master2:2181,slave1:2181</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.hosts</name>
<value>*</value>
</property>
<property>
<name>hadoop.proxyuser.hadoop.groups</name>
<value>*</value>
</property>
</configuration>
修改 hdfs-site.xml
<configuration>
<!-- 指定副本数:不用超过 datanode 节点数量 -->
<property>
<name>dfs.replication</name>
<value>3</value>
</property>
<!-- 指定 hdfs 的 nameservice 为 cluster,需要和 core-site.xml 中保持一致 -->
<property>
<name>dfs.nameservices</name>
<value>nn-cluster</value>
</property>
<!-- cluster 集群下有两个 namenode,分别是 master1,master2 -->
<property>
<name>dfs.ha.namenodes.nn-cluster</name>
<value>nn1,nn2</value>
</property>
<!-- master1 的 RPC 通信地址-->
<property>
<name>dfs.namenode.rpc-address.nn-cluster.nn1</name>
<value>master1:9000</value>
</property>
<!-- master1 的 http 通信地址-->
<property>
<name>dfs.namenode.http-address.nn-cluster.nn1</name>
<value>master1:50070</value>
</property>
<!-- master2 的 RPC 通信地址-->
<property>
<name>dfs.namenode.rpc-address.nn-cluster.nn2</name>
<value>master2:9000</value>
</property>
<!-- master2 的 http 通信地址-->
<property>
<name>dfs.namenode.http-address.nn-cluster.nn2</name>
<value>master2:50070</value>
</property>
<!-- 指定 namenode 的 edits 元数据在 JournalNode 上的存放位置 -->
<property>
<name>dfs.namenode.shared.edits.dir</name>
<value>qjournal://master1:8485;master2:8485;slave1:8485/nn-cluster</value>
</property>
<!-- 指定 JournalNode 在本地磁盘存放数据的位置 -->
<property>
<name>dfs.journalnode.edits.dir</name>
<value>/data/hadoop/journaldata</value>
</property>
<!-- 开启 namenode 失败自动切换 -->
<property>
<name>dfs.ha.automatic-failover.enabled</name>
<value>true</value>
</property>
<!-- 配置失败自动切换实现方式 -->
<property>
<name>dfs.client.failover.proxy.provider.nn-cluster</name>
<value>org.apache.hadoop.hdfs.server.namenode.ha.ConfiguredFailoverProxyProvider</value>
</property>
<!-- 配置隔离机制方法,多个机制用换行分割,即每个机制占用一行-->
<property>
<name>dfs.ha.fencing.methods</name>
<value>
sshfence
shell(/bin/true)
</value>
</property>
<!-- 使用 sshfence 隔离机制时需要 ssh 免登陆 -->
<property>
<name>dfs.ha.fencing.ssh.private-key-files</name>
<value>/home/hadoop/.ssh/id_rsa</value>
</property>
<!-- 配置 sshfence 隔离机制超时时间(30s) -->
<property>
<name>dfs.ha.fencing.ssh.connect-timeout</name>
<value>30000</value>
</property>
<!-- 配置 namenode 及 datanode 数据存放位置 -->
<property>
<name>dfs.namenode.name.dir</name>
<value>file:/data/hadoop/dfs/name</value>
</property>
<property>
<name>dfs.datanode.data.dir</name>
<value>file:/data/hadoop/dfs/data</value>
</property>
</configuration>
修改 mapred-site.xml
<configuration>
<!-- 指定运行 mr 的框架为 yarn -->
<property>
<name>mapreduce.framework.name</name>
<value>yarn</value>
</property>
<!-- 设置 mapreduce 历史服务器地址和端口号 -->
<property>
<name>mapreduce.jobhistory.address</name>
<value>master1:10020</value>
</property>
<!-- 设置 mapreduce 历史服务器的 web 访问地址-->
<property>
<name>mapreduce.jobhistory.webapp.address</name>
<value>master1:19888</value>
</property>
</configuration>
修改 yarn-site.xml
<configuration>
<!-- 开启 RM 高可用 -->
<property>
<name>yarn.resourcemanager.ha.enabled</name>
<value>true</value>
</property>
<!-- 指定 RM 的 cluster id -->
<property>
<name>yarn.resourcemanager.cluster-id</name>
<value>rm-cluster</value>
</property>
<!-- 指定 RM 的名字-->
<property>
<name>yarn.resourcemanager.ha.rm-ids</name>
<value>rm1,rm2</value>
</property>
<!-- 分别指定 RM 的地址 -->
<property>
<name>yarn.resourcemanager.hostname.rm1</name>
<value>master1</value>
</property>
<property>
<name>yarn.resourcemanager.hostname.rm2</name>
<value>master2</value>
</property>
<!-- 指定 zk 集群地址 -->
<property>
<name>yarn.resourcemanager.zk-address</name>
<value>master1:2181,master2:2181,slave1:2181</value>
</property>
<!-- 要运行 MR 程序必须配置的附属服务 -->
<property>
<name>yarn.nodemanager.aux-services</name>
<value>mapreduce_shuffle</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm1</name>
<value>master1:8088</value>
</property>
<property>
<name>yarn.resourcemanager.webapp.address.rm2</name>
<value>master2:8088</value>
</property>
</configuration>
启动
在master1上格式化namenode(格式化之前要启动journalnode集群)
hdfs namenode -format
复制master1上的namenode文件夹到master2
scp -r /data/hadoop/dfs/name hadoop@master2:/data/hadoop/dfs/
也可以在master2上执行
hdfs namenode -bootstrapStandby
格式化ZKFC(任意一个nn节点上执行)
hdfs zkfc -formatZK
启动hdfs,start-dfs.sh脚本会启动namenode、datanode、journal、zkfc
start-dfs.sh
启动yarn
start-yarn.sh
yarn-doeman.sh start resourcemanager
启动 mapreduce 任务历史服务器
mr-jobhistory-daemon.sh start historyserver