一、说明
本文章介绍的是怎么使用Keepalived配置PostgreSQL高可用集群, 需要读者对这两个软件有一定的了解。配置好以后集群自动切换主从,从而实现pg的高可用。
二、环境
这里的ip 不存在
IP | 虚拟ip | 版本 | |
master | 192.168.20.11 | 192.168.20.13 | pg 13.14 keepalived |
slave | 192.168.20.12 | pg 13.14 keepalived |
三、安装postgresql并配置
1、安装postgresql 数据库
读者可按照自己的方式安装postgresql数据库,离线或者yum安装都可
2、修改配置
安装完数据库以后进行初始化
#初始化数据库 $DATA_DIR为配置文件路径
initdb -D $DATA_DIR
修改配置文件vi postgresql.conf 添加一下配置
wal_level = replica
max_wal_senders= 10
wal_keep_size = 512
wal_sender_timeout = 60s
max_connections = 5100
hot_standby = on
max_standby_streaming_delay = 30s
wal_receiver_status_interval = 10s
#这里的host 的值为主备互相,如果是11服务器就填写192.168.20.12 ,如果是12服务器就填写192.168.20.11
primary_conninfo = 'host=$节点IP port=17306 user=replicator password=replicator'
recovery_target_timeline = 'latest'
vi pg_hba.conf添加replicator用户信息
host replication replicator 0.0.0.0/0 trust
四、安装并配置keepalived
1、安装keepalived
读者自行安装keepalived
2、配置keeplived
配置etc/keeplived/keepalived-master.conf 和 etc/keeplived/keepalived-backup.conf
master
vrrp_script check_pg_alived {
script <安装位置>/master-scripts/pg_check.sh"
interval 1
}
vrrp_instance VI_3 {
state MASTER
interface ${网卡地址} #网卡地址,linux查看ip addr
virtual_router_id 52 #此处id可以随意定但是要保持一致
priority 100
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
track_script {
check_pg_alived
}
virtual_ipaddress {
192.168.20.13 #VIP地址
}
notify_master <安装位置>/master-scripts/pg_master.sh
notify_backup <安装位置>/master-scripts/pg_backup.sh
notify_fault <安装位置>/master-scripts/pg_fault.sh
}
backup
vrrp_script check_pg_alived {
script <安装位置>/backup-scripts/pg_check.sh"
interval 1
}
vrrp_instance VI_3 {
state BACKUP
interface ${网卡地址} #网卡地址,linux查看ip addr
virtual_router_id 52 #此处id可以随意定但是要保持一致
priority 99
advert_int 1
authentication {
auth_type PASS
auth_pass 123456
}
track_script {
check_pg_alived
}
virtual_ipaddress {
192.168.20.13 #VIP地址
}
notify_master <安装位置>/backup-scripts/pg_master.sh
notify_backup <安装位置>/backup-scripts/pg_backup.sh
notify_fault <安装位置>/backup-scripts/pg_fault.sh
}
编写 pg_check.sh pg_master.sh pg_backup.sh pg_fault.sh脚本
vi pg_check.sh
#!/bin/bash
#PG is alive or not?
A=`ps -ef | grep postgres | grep "./bin/postgres" |grep -v grep| wc -l`
if [ $A -eq 0 ];then
exit 1
else
exit 0
fi
vi pg_master.sh
#!/bin/bash
#从slave 变为master的操作
LOGFILE=日志目录
export DATA_DIR=pg数据库的配置文件目录
export MASTER_HOST=192.168.20.11
export SLAVE_HOST=192.168.20.12
export PGDATABASE=postgres
export PGPORT=${PGPORT}
export PGUSER=pg数据的所属用户
export PGBIN=pg数据库的bin目录
#keepalived使用的配置文件
HOST_IF=$( sudo ip route|grep default|head -n1|cut -d' ' -f5)
LOCAL_IP=$( sudo ip a|grep "$HOST_IF$"|head -n1|awk '{print $2}'|cut -d'/' -f1)
if [ "$LOCAL_IP" = "$MASTER_HOST" ]; then
export file=安装目录/keepalived-master.conf
export PGMASTER_HOST=${SLAVE_HOST}
else
export file=安装目录/keepalived-backup.conf
export PGMASTER_HOST=${MASTER_HOST}
fi
#当pg主节点发生故障的时候切换
#如果当前节点是从节点则升为主节点
SQL1='SELECT pg_is_in_recovery from pg_is_in_recovery();'
db_role=`echo $SQL1 | ${PGBIN}/psql -d $PGDATABASE -U $PGUSER -h 127.0.0.1 -p $PGPORT -At -w`
if [ $db_role == 't' ];then
echo -e `date +"%F %T"` "the current database is standby DB! [切换至master角色] " >> $LOGFILE
su - ${PGUSER} -c "$PGBIN/pg_ctl promote -D $DATA_DIR"
else
echo -e `date +"%F %T"` "the current database is master DB!" >> $LOGFILE
fi
#修改配置并重新加载
sed -i '/vrrp_instance VI_3 {/{n;s/.*/ state MASTER/}' "$file"
awk '{
if (found && count == 4) {
print " priority 100"
found = 0
} else {
print
}
}
/vrrp_instance VI_3 {/ {
found = 1
count = 0
}
found {
count++
}' "$file" > "$file.tmp" && mv "$file.tmp" "$file"
chown ${PGUSER}:${PGUSER} $file
#重新加载配置文件
pid=`ps -ef | grep keepalived | grep -v grep | awk '{print $2}' | head -n 1`
kill -SIGHUP $pid
vi pg_backup.sh
#!/bin/bash
#从master 变为slave的操作
LOGFILE=日志目录
export DATA_DIR=pg数据库的配置文件目录
export MASTER_HOST=192.168.20.11
export SLAVE_HOST=192.168.20.12
export PGDATABASE=postgres
export PGPORT=${PGPORT}
export PGUSER=pg数据的所属用户
export PGBIN=pg数据库的bin目录
#keepalived使用的配置文件
HOST_IF=$( sudo ip route|grep default|head -n1|cut -d' ' -f5)
LOCAL_IP=$( sudo ip a|grep "$HOST_IF$"|head -n1|awk '{print $2}'|cut -d'/' -f1)
if [ "$LOCAL_IP" = "$MASTER_HOST" ]; then
export file=安装目录/keepalived-master.conf
export PGMASTER_HOST=${SLAVE_HOST}
else
export file=安装目录/keepalived-backup.conf
export PGMASTER_HOST=${MASTER_HOST}
fi
#修改配置并重新加载
sed -i '/vrrp_instance VI_3 {/{n;s/.*/ state BACKUP/}' "$file"
awk '{
if (found && count == 4) {
print " priority 99"
found = 0
} else {
print
}
}
/vrrp_instance VI_3 {/ {
found = 1
count = 0
}
found {
count++
}' "$file" > "$file.tmp" && mv "$file.tmp" "$file"
#重新加载配置文件
pid=`ps -ef | grep keepalived | grep -v grep | awk '{print $2}' | head -n 1`
kill -SIGHUP $pid
chown pangu:pangu $file
vi pg_fault.sh
#!/bin/bash
#从master 变为slave的操作
LOGFILE=日志目录
export DATA_DIR=pg数据库的配置文件目录
export MASTER_HOST=192.168.20.11
export SLAVE_HOST=192.168.20.12
export PGDATABASE=postgres
export PGPORT=${PGPORT}
export PGUSER=pg数据的所属用户
export PGBIN=pg数据库的bin目录
HOST_IF=$( sudo ip route|grep default|head -n1|cut -d' ' -f5)
LOCAL_IP=$( sudo ip a|grep "$HOST_IF$"|head -n1|awk '{print $2}'|cut -d'/' -f1)
if [ "$LOCAL_IP" = "$MASTER_HOST" ]; then
export DB_HOST=${SLAVE_HOST}
else
export DB_HOST=${MASTER_HOST}
fi
function check_database_connection() {
# 使用psql命令尝试连接远程数据库
$PGBIN/psql -h "$DB_HOST" -p "$PGPORT" -d "$PGDATABASE" -U "$PGUSER" -w -c "SELECT version();" &> /dev/null
if [ $? -eq 0 ]; then
touch ${DATA_DIR}/standby.signal
chown ${PGUSER}:${PGUSER} ${DATA_DIR}/standby.signal
chmod 700 ${DATA_DIR}/standby.signal
fi
}
#故障检车脚本
num=`ps -ef | grep postgres | grep "./bin/postgres" |grep -v grep| wc -l`
if [ $num -ge 0 ];then
echo -e `date +"%F %T"` "数据库down机" >> $LOGFILE
#判断当前机器是否是最后一个down机的如果不是则执行下面的脚本
check_database_connection
fi
五、启动验证
启动posgresql和keepalived 注意postgresql要先启动主节点,再启动从节点
1、关闭pg主节点观察主从是否互换,虚拟ip是否自动漂移,启动停掉的主节点观察是否正常
2、关闭pg从节点,观察服务是否正常,然后启动从节点观察是否正常
本篇博文可能有很多表达不清楚,或者写的不够清楚的地方,如果读者有什么问题咨询可以留言,本作者看到以后将第一时间回复