1、前提条件
准备两台配置相同的服务器
2、安装DRBD
[root@server139 ~]# yum -y update kernel kernel-devel
[root@server139 ~]# rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
[root@server139 ~]# yum -y install drbd84-utils kmod-drbd84
3、新建一个分区
[root@server139 ~]# fdisk -l
[root@server139 ~]# fdisk /dev/sdb
n
w
注:两台机器新建的分区大小必须一样
4、修改DRBD配置
[root@server139 ~]# vim /etc/drbd.conf
#include "drbd.d/global_common.conf";#注释掉这行,避免和我们自己写的配置产生冲突。
include "drbd.d/*.res";
[root@server139 ~]# vim /etc/drbd.d/drbd_basic.cfg
global {
usage-count yes; # 是否参与DRBD使用者统计,默认为yes,yes or no都无所谓
}
common {
syncer { rate 100M; } # 设置主备节点同步的网络速率最大值,默认单位是字节,我们可以设定为兆
}
resource r0 { # r0为资源名,我们在初始化磁盘的时候就可以使用资源名来初始化。
protocol C; #使用 C 协议。
handlers {
pri-on-incon-degr "echo o > /proc/sysrq-trigger ; halt -f ";
pri-lost-after-sb "echo o > /proc/sysrq-trigger ; halt -f ";
local-io-error "echo o > /proc/sysrq-trigger ; halt -f";
fence-peer "/usr/lib4/heartbeat/drbd-peer-outdater -t 5";
pri-lost "echo pri-lst. Have a look at the log file. | mail -s 'Drbd Alert' root";
split-brain "/usr/lib/drbd/notify-split-brain.sh root";
out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
}
net {
cram-hmac-alg "sha1";
shared-secret "NFS-HA";
# drbd同步时使用的验证方式和密码信息
}
disk {
on-io-error detach;
fencing resource-only;
}
startup {
wfc-timeout 120;
degr-wfc-timeout 120;
}
device /dev/drbd0; # 这里/dev/drbd0是用户挂载时的设备名字,由DRBD进程创建
on server139 { #每个主机名的说明以on开头,后面是hostname(必须在/etc/hosts可解析)
disk /dev/nfsdisk/nfsvolume; # 使用这个磁盘作为/dev/nfsdisk/nfsvolume的磁盘/dev/drbd0。(这个就是我们之前创建的一定要保证存在要不后续会出错)
address 192.168.37.139:7788; #设置DRBD的监听端口,用于与另一台主机通信
meta-disk internal; # drbd的元数据存放方式
}
on server140 {
disk /dev/nfsdisk/nfsvolume;
address 192.168.37.140:7788;
meta-disk internal;
}
}
注:两台机器的drbd配置必须一样,并且/etc/hosts设置必须正确
5、启动DRBD
[root@server139 ~]# service firewalld stop //临时关闭防火墙
[root@server139 ~]# setenforce 0 //临时关闭selinux
[root@server139 ~]# systemctl disable firewalld //永久关闭防火墙
[root@server139 ~]# vim /etc/sysconfig/selinux //永久关闭selinux 设置SELINUX=disabled;
主从节点都先执行以下命令:
[root@server139 ~]# drbdadm create-md r0 //创建元数据库
[root@server139 ~]# service drbd start //启动drbd 服务
[root@server139 ~]# service drbd status //查看drbd服务状态
[root@server139 ~]# cat /proc/drbd //
主节点再执行:
[root@server139 ~]# drbdadm primary r0 //设置成主节点 --force
[root@server139 ~]# mkfs.ext4 /dev/drbd0 //需要格式化
[root@server139 ~]# mount /dev/drbd0 /data //umount /dev/drbd0
[root@server139 ~]# cat /proc/drbd //
6、DRBD手动故障切换测试
主节点:
[root@server139 ~]# ls /data //
[root@server139 ~]# touch /data/test.txt //
[root@server139 ~]# umount /dev/drbd0 //卸载挂载的盘符
[root@server139 ~]# service drbd stop //停止drbd 服务
从节点:
[root@server140 ~]# drbdadm primary r0 //设置成主节点 --force
[root@server140 ~]# mount /dev/drbd0 /data //挂载盘符到/data目录
[root@server140 ~]# ls /data //查看data下是否和在主节点下看到的一样
注:测试完成之后记得恢复
7、安装nfs服务
[root@server139 ~]# yum install rpcbind nfs-utils
[root@server139 ~]# vim /etc/exports
/data 192.168.37.0/24(rw,sync,no_root_squash)
[root@server139 ~]# service rpcbind start
[root@server139 ~]# service nfs start
注:主从节点都要安装
8、安装Keepalived
[root@server139 ~]# yum install -y keepalived
[root@server139 ~]# mkdir /etc/keepalived/logs
[root@server139 ~]# vim /etc/keepalived/check_nfs.sh
#!/bin/sh
###检查nfs可用性:进程和是否能够挂载
/sbin/service nfs status &>/dev/null
if [ $? -ne 0 ];then
###如果服务状态不正常,先尝试重启服务
/sbin/service nfs restart
/sbin/service nfs status &>/dev/null
if [ $? -ne 0 ];then
###若重启nfs服务后,仍不正常
###卸载drbd设备
umount /dev/drbd0
###将drbd主降级为备
drbdadm secondary r0
#关闭keepalived
/sbin/service keepalived stop
fi
fi
[root@server139 ~]# vim /etc/keepalived/check_network.sh
#! /bin/bash
ping -c 192.168.37.100 > /dev/null 2>&1
if [ $? -eq 0 ];then
echo ok
else
echo retry.....
ping -c 192.168.37.100 > /dev/null 2>&1
if [ $? -eq 0 ];then
echo ok
else
echo err
/etc/keepalived/notify_master.sh
fi
#/etc/keepalived/notify_master.sh
fi
[root@server139 ~]# vim /etc/keepalived/notify_master.sh
#!/bin/bash
time=`date "+%F %H:%M:%S"`
echo -e "$time ------notify_master------\n" >> /etc/keepalived/logs/notify_master.log
/sbin/drbdadm primary r0 --force &>> /etc/keepalived/logs/notify_master.log
/bin/mount /dev/drbd0 /data &>> /etc/keepalived/logs/notify_master.log
/sbin/service nfs restart &>> /etc/keepalived/logs/notify_master.log
echo -e "\n" >> /etc/keepalived/logs/notify_master.log
[root@server139 ~]# vim /etc/keepalived/notify_backup.sh
#!/bin/bash
time=`date "+%F %H:%M:%S"`
echo -e "$time ------notify_backup------\n" >> /etc/keepalived/logs/notify_backup.log
/sbin/service nfs stop &>> /etc/keepalived/logs/notify_backup.log
/bin/umount /dev/drbd0 &>> /etc/keepalived/logs/notify_backup.log
/sbin/drbdadm secondary r0 &>> /etc/keepalived/logs/notify_backup.log
echo -e "\n" >> /etc/keepalived/logs/notify_backup.log
[root@server139 ~]# vim /etc/keepalived/notify_stop.sh
#!/bin/bash
time=`date "+%F %H:%M:%S"`
echo -e "$time ------notify_stop------\n" >> /etc/keepalived/logs/notify_stop.log
/sbin/service nfs stop &>> /etc/keepalived/logs/notify_stop.log
/bin/umount /data &>> /etc/keepalived/logs/notify_stop.log
/sbin/drbdadm secondary r0 &>> /etc/keepalived/logs/notify_stop.log
echo -e "\n" >> /etc/keepalived/logs/notify_stop.log
配置主节点keepalived.conf
[root@server139 ~]# vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
global_defs {
router_id DRBD_HA_MASTER
}
vrrp_script chk_nfs {
script "/etc/keepalived/check_nfs.sh"
interval 5
}
vrrp_instance VI_1 {
state MASTER
interface ens33
virtual_router_id 51
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
track_script {
chk_nfs
}
notify_stop /etc/keepalived/notify_stop.sh
notify_master /etc/keepalived/notify_master.sh
virtual_ipaddress {
192.168.37.100/24
}
}
配置从节点keepalived.conf
[root@server139 ~]# vim /etc/keepalived/keepalived.conf
global_defs {
router_id DRBD_HA_BACKUP
}
vrrp_script chk_net{
script "/etc/keepalived/check_network.sh"
interval 5
}
vrrp_instance VI_1 {
state BACKUP
interface ens33
virtual_router_id 51
priority 90
advert_int 1
authentication {
auth_type PASS
auth_pass 1111
}
track_script {
chk_net
}
notify_master /etc/keepalived/notify_master.sh
notify_backup /etc/keepalived/notify_backup.sh
virtual_ipaddress {
192.168.37.100/24
}
}
[root@server139 ~]# service keepalived start //启动keepalived 服务
[root@server139 ~]# service keepalived status //查看keepalived 服务状态
注:主从节点都要安装
9、客户端挂载NFS
客户端只需要安装rpcbind程序,并确认服务正常
[root@server141 ~]# yum install rpcbind nfs-utils
[root@server141 ~]# service rpcbind start
[root@server141 ~]# mount -t nfs 192.168.37.100:/data /data
[root@server141 ~]# ls /data
10、模拟测试自动故障切换
1、先关闭主节点主机上的keepalived服务。就会发现VIP资源已经转移到从节点主机上了。同时,主节点主机的nfs也会主动关闭,同时从节点主机会升级为DRBD的主节点。
[root@server139 ~]# service keepalived stop
[root@server139 ~]# ip address
[root@server139 ~]# ps -ef|grep nfs
[root@server139 ~]# df -h
查看系统日志,也能看到VIP资源转移信息
[root@server139 ~]# tail -1000 /var/log/messages
登录到从节点主机上,发现VIP资源已经转移过来
[root@server140 ~]# ip address
[root@server140 ~]# df -h
2、当主节点主机的keepalived服务恢复启动后,VIP资源又会强制夺回来(可以查看/var/log/message系统日志),并且主节点主机还会再次变为DRBD的主节点。
3、关闭主节点主机的nfs服务。根据监控脚本,会主动去启动nfs,只要当启动失败时,才会强制由DRBD的主节点降为备份节点,并关闭keepalived。从而跟1的流程一样实现故障转移。
11、客户端连续读写测试
1、在nfs客户端执行for i in {1..5000};do dd if=/dev/zero of=/data/$i.file bs=1M count=1;done
(/data为客户端挂载点,用dd向挂载点内写入5000个1M的文件)。执行过程中在主关闭keepalived,验证写入过程是否会中断。经验证,写入过程没有中断,但中会出现一段时间的延时。
[root@server141 ~]# for i in {1..5000};do dd if=/dev/zero of=/data/$i.file bs=1M count=1;done
2、客户端从其他目录向挂载点拷贝文件,中途开启原主的keepalived(vip自动切回主,产生一次切换动作)。经验证,拷贝过程不中断,并且文件状态亦未受到切换动作的影响,但中会出现一段时间的延时。
[root@server141 ~]# time cp -av /usr/* /data
3、客户端挂载点向其他目录拷贝大文件,执行过程中在主关闭keepalived,md5sum验证文件的一致性。
参考链接:https://cloud.tencent.com/developer/article/1026286
include "drbd.d/*.cfg"; 增加一行cfg