本文是基于CentOS 7及Openstack juno版本的高可用实践。
高可用组件安装配置
准备工作
首先确保两台机器时间同步,配置ssh。
安装组件
添加yum源,这个源包含了crmsh、resource-agents等包:
[haclustering]
name=HA Clustering
baseurl=http://download.opensuse.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-7/
enabled=1
gpgcheck=0
安装相关组件:
#yum install pacemaker corosync resource-agents crmsh pcs
安装DRBD
方法一通过yum源安装:
# rpm --import http://elrepo.org/RPM-GPG-KEY-elrepo.org
# rpm -Uvh http://www.elrepo.org/elrepo-release-7.0-2.el7.elrepo.noarch.rpm
# yum -y install drbd84-utils kmod-drbd84
方法二编译安装:
#yum install docbook-style-xsl #编译drbd时候用到
#mkdir -p /tmp/drbdinst
#/usr/bin/wget --directory-prefix=/tmp/drbdinst/ http://oss.linbit.com/drbd/8.4/drbd-8.4.5.tar.gz
#cd /tmp/drbdinst
#tar -zxfp drbd-8.4.5.tar.gz
#cd drbd-8.4.5
#/usr/bin/yum -y install flex gcc make
#make
#make install
#/usr/bin/yum -y install libxslt
#/usr/bin/wget --directory-prefix=/tmp/drbdinst/ http://oss.linbit.com/drbd/drbd-utils-8.9.1.tar.gz
#cd /tmp/drbdinst
#tar -zxfp drbd-utils-8.9.1.tar.gz
#cd drbd-utils-8.9.1
#./configure --prefix=/usr --localstatedir=/var --sysconfdir=/etc
#make
#make install
#cp /lib/udev/65-drbd.rules /lib/udev/rules.d/
#/bin/rm -rf /tmp/drbdinst
配置
Corosync配置
从/etc/corosync/corosync.conf.example.udpu复制sample文件到/etc/corosync/corosync.conf,根据实际配置编辑:
compatibility: whitetank
service {
ver: 1
name: pacemaker
use_logd: yes
}
logging {
fileline: off
to_logfile: yes
logfile: /var/log/cluster/corosync.log
to_stderr: no
debug: off
timestamp: on
to_syslog: yes
logger_subsys {
subsys: QUORUM
debug: off
}
}
totem {
version: 2
token: 3000
secauth: on
rrp_mode: active
interface {
ringnumber: 0
bindnetaddr: 10.0.0.0
mcastaddr: 226.94.1.1
mcastport: 5405
}
}
quorum {
provider: corosync_votequorum
expected_votes: 2
}
如果secauth设置为on,则需要生成一个加密key用于集群通信:
# corosync-keygen
生成完成后,复制到其他节点:
# scp -p /etc/corosync/authkey controllerv:/etc/corosync/
在所有节点上启动服务:
# systemctl start corosync pacemaker
查看配置、membership及quorum API
# corosync-cfgtool -s
# corosync-cmapctl | grep members
# corosync-quorumtool -l 或pcs status corosync
Pacemaker配置
首先验证pacemaker安装:
# ps axf
49091 ? Ss 0:00 /usr/sbin/pacemakerd -f
49092 ? Ss 0:00 \_ /usr/libexec/pacemaker/cib
49093 ? Ss 0:00 \_ /usr/libexec/pacemaker/stonithd
49094 ? Ss 0:00 \_ /usr/libexec/pacemaker/lrmd
49095 ? Ss 0:00 \_ /usr/libexec/pacemaker/attrd
49096 ? Ss 0:00 \_ /usr/libexec/pacemaker/pengine
49097 ? Ss 0:00 \_ /usr/libexec/pacemaker/crmd
检查集群状态:
# crm status
Last updated: Tue Dec 2 23:04:29 2014
Last change: Tue Dec 2 22:54:01 2014 via crmd on node1
Stack: corosync
Current DC: NONE
2 Nodes configured
0 Resources configured
Online: [ controller controllerv ]
查看配置:
# crm configure show
node 167772171: controller
node 167772172: controllerv
property cib-bootstrap-options: \
dc-version=1.1.10-32.el7_0.1-368c726 \
cluster-infrastructure=corosync
cibadmin --query --local或pcs cluster cib查看xml格式的cib信息。
如下命令验证配置,可以发现问题:
# crm_verify -L -V
error: unpack_resources: Resource start-up disabled since no STONITH resources have been defined
error: unpack_resources: Either configure some or disable STONITH with the stonith-enabled option
error: unpack_resources: NOTE: Clusters with shared data need STONITH to ensure data integrity
Errors found during check: config not valid
因为测试没有STONITH设备,所以禁用STONITH:(STONITH会在另一篇不可做详细介绍)
# crm configure property stonith-enabled=false
双节点忽略quorum:
# crm configure property no-quorum-policy=ignore
修改后查看配置并验证:
# crm configure show
node 167772171: controller
node 167772172: controllerv
property cib-bootstrap-options: \
dc-version=1.1.10-32.el7_0.1-368c726 \
cluster-infrastructure=corosync \
stonith-enabled=false \
no-quorum-policy=ignore
# crm_verify -L
#
配置DRBD
MARIADB
MySQL/DRBD/Pacemaker/Corosync Stack
编辑/etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example
include "drbd.d/global_common.conf";
include "drbd.d/*.res";
使用global_common.conf,后面还需要配置rabbitmq。
global {
usage-count no;
}
common {
protocol C; # C 同步 A 异步 B 半同步
handlers {
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
}
startup {
wfc-timeout 30; #DRBD资源连接
degr-wfc-timeout 30; #节点在degraded cluster
}
disk {
on-io-error detach;
fencing resource-only;
}
net {
#通信使用的信息算法
cram-hmac-alg "sha1";
shared-secret "mydrbd";
}
syncer {
rate 100M; #设置同步的最大网速
}
}
新建/etc/drbd.d/mariadb.res
resource mariadb {
device /dev/drbd0;
disk /dev/mapper/data_vg-mariadb;
meta-disk internal; #此处根据是情况设置,下面有具体说明
on controller {
address 10.0.0.11:7789;
}
on controllerv {
address 10.0.0.12:7789;
}
}
注意:关于DRBD元数据的一些注意事项请参考我另一篇博客。
因为我的数据已经存在了,所以防止数据丢失,先进行备份。
# dd if=/dev/data_vg/mariadb of=/root/back bs=1M count=150
在主节点上,先将对应文件系统umount,然后进行元数据设置。
# drbdadm create-md mariadb
md_offset 1077932032
al_offset 1077899264
bm_offset 1077862400
Found ext2 filesystem