背景
网上postgres的高可用笔记都是基于postgres11版本甚至更低的版本部署,而postgres12主从改动较大,网上笔记基本都不适用,所以总结一篇,让大家参考一下。
基于流复制部署,keepalived
检测postgresql
状态,如果pgsql
死了,那么keepalived
的VIP
漂移到备库,同时提升备库为主库,可以进行读写操作。(默认流复制的主库为可读可写,备库为只读)
一.服务初始化
系统版本:CentOS Linux release 7.8.2003 (Core)
内核版本:Linux pg2 3.10.0-1127.19.1.el7.x86_64
数据库版本:postgresql-12.2
keepalived版本:keepalived-1.3.5-19.el7.x86_64
172.16.77.171 pg1
172.16.77.172 pg2
172.16.77.202 vip
两个服务器都执行
#关闭防火墙
sed -i '/SELINUX/s/enforcing/disabled/' /etc/selinux/config & setenforce 0&& systemctl disable firewalld.service && systemctl stop firewalld.service
#安装基本工具
yum install gcc gcc-c++ vim-enhanced glibc make unzip openssl openssl-devel openssh-server openssh-clients wget -y
[root@pg2 postgres]# cat /etc/hosts
127.0.0.1 localhost localhost.localdomain localhost4 localhost4.localdomain4
::1 localhost localhost.localdomain localhost6 localhost6.localdomain6
172.16.77.171 pg1
172.16.77.172 pg2
二.Postgres主从
两台机器都做
useradd postgres
passwd postgres
yum install readline-devel
tar -zxvf postgresql-12.2.tar.gz
cd /home/postgres/postgresql-12.2
#指定安装路径(configure配置)
./configure --prefix=/usr/local/postgresql
make
make install
chown -R postgres:postgres /usr/local/postgresql/
chown -R postgres:postgres /home/postgres
#设置环境变量
su – postgres
vi .bash_profile
PGHOME=/usr/local/postgresql
export PGHOME
PGDATA=/usr/local/postgresql/data
export PGDATA
PATH=$PATH:$HOME/.local/bin:$HOME/bin:$PGHOME/bin
export PATH
source ~/.bash_profile
psql -V
对pg1服务器初始化,也就是下面操作只需要pg1做
cd /usr/local/postgresql/
由于之前我们刚刚设置了PGDATA环境变量,所以此处我们也就无需再额外指定,最后执行初始化命令即可:
initdb
看到如下信息就说明初始化成功了,
#用postgres用户创建相关文件夹
mkdir /usr/local/postgresql/log
mkdir /home/postgres/arch
#编辑主库配置文件
cd /usr/local/postgresql/data/
vim /usr/local/postgresql/data/postgresql.conf
listen_addresses = '*'
wal_level = replica
max_wal_senders = 20
wal_keep_segments = 64
max_connections = 100
archive_mode = no
archive_command = 'test ! -f /home/postgres/arch/%f && cp %p /home/postgres/arch/%f'
#编辑访问控制文件
vi /usr/local/postgresql/data/pg_hba.conf 添加如下配置
host all all 172.16.77.0/24 trust
host all all 192.168.0.0/16 trust
host replication repl 172.16.77.0/24 trust
#启动数据库
pg_ctl start -l /usr/local/postgresql/log/pg_server.log
psql 进入
#创建一个同步用户
create role repl login replication encrypted password 'oracle';
\du 看角色列表,看到用户创建成功
pg2机器操作
#用postgres用户创建相关文件夹
su - postgres
mkdir /usr/local/postgresql/log
mkdir /home/postgres/arch
#(同步)pg_basebackup 工具将主库的数据备份拷贝到备库上
不需要初始化,直接从主库备份就行,如有data直接删掉或改名掉:
pg_basebackup -h pg1 -p 5432 -U repl -F p -P -D /usr/local/postgresql/data
参数解析:
-h,主库主机,-p,主库服务端口;-U,复制用户;
-F,p是默认输出格式,输出数据目录和表空间相同的布局,t表示tar格式输出;
-P,同--progress,显示进度;
-D,输出到指定目录
#编辑访问控制文件
vi /usr/local/postgresql/data/pg_hba.conf 添加如下配置
host all all 172.16.77.0/24 trust
host all all 192.168.0.0/16 trust
host replication repl 172.16.77.0/24 trust
#创建备库文件standby.signal
Postgresql12以后recovery.conf这个参数文件现在已经合并到postgresql.conf文件中了。如果PG检测到有recovery.conf的存在,PG将不会成功启动。
如果想让PG处于非主(non-primary)模式,您可以使用standby.signal这两个文件。
如果它在这个目录下面碰到standby.signal这个文件,它就把这个数据库变成备库。
vim /usr/local/postgresql/data/standby.signal
primary_conninfo = 'host=pg2 application_name=standby_pg1 port=5432 user=repl password=oracle options=''-c wal_sender_timeout=5000'''
standby_mode = on
#创建postgresql.auto.conf文件
[root@pg1 postgresql-12.2]# vim /usr/local/postgresql/data/postgresql.auto.conf
primary_conninfo = 'host=pg2 application_name=standby_pg1 port=5432 user=repl password=oracle options=''-c wal_sender_timeout=5000'''
#启动备库服务,以更新配置
pg_ctl -l /usr/local/postgresql/log/pglog.log start
到上面为止主从搭建完成,下面来验证一下
psql进入数据库
select pg_is_in_recovery(); 备库是t,主库是f
在主库中执行
\x 开启展示显示
select * from pg_stat_replication;
执行 pg_controldata | grep cluster
三.安装keepalived
两台机器执行
#yum install keepalived
pg1配置
$ vim /etc/keepalived/keepalived.conf
配置文件位置有可能不一样,可以通过whereis keepalived查找一下
! Configuration File for keepalived
#全局配置
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 172.16.77.171
smtp_connect_timeout 30
router_id 172.16.77.171
vrrp_skip_check_adv_addr
# vrrp_strict # 这行注释掉
vrrp_garp_interval 0
vrrp_gna_interval 0
}
#执行脚本配置
vrrp_script check_pg_alived {
script "/etc/keepalived/scripts/check_pg.sh" #检测脚本位置
interval 2
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP # 主备一致,都是BACKUP
interface eth0 #网卡
virtual_router_id 51 # 主备需要一致
priority 97 # 优先级 ,主需要比备高 比如主是101 备可以是100
advert_int 1
authentication {
auth_type PASS # 主备需要一致
auth_pass 1111 # 主备需要一致
}
track_script {
check_pg_alived
}
virtual_ipaddress { # vip设置 主备一致
172.16.77.201
}
}
pg2配置
vim /etc/keepalived/keepalived.conf
! Configuration File for keepalived
#全局配置
global_defs {
notification_email {
acassen@firewall.loc
failover@firewall.loc
sysadmin@firewall.loc
}
notification_email_from Alexandre.Cassen@firewall.loc
smtp_server 172.16.77.172
smtp_connect_timeout 30
router_id 172.16.77.172
vrrp_skip_check_adv_addr
# vrrp_strict # 这行注释掉
vrrp_garp_interval 0
vrrp_gna_interval 0
}
#执行脚本配置
vrrp_script check_pg_alived {
script "/etc/keepalived/scripts/check_pg.sh" #检测脚本位置
interval 2
weight -5
fall 2
rise 1
}
vrrp_instance VI_1 {
state BACKUP # 主备一致,都是BACKUP
interface eth0 #网卡
virtual_router_id 51 # 主备需要一致
priority 96 # 优先级 ,主需要比备高 比如主是101 备可以是100
advert_int 1
authentication {
auth_type PASS # 主备需要一致
auth_pass 1111 # 主备需要一致
}
track_script {
check_pg_alived
}
virtual_ipaddress { # vip设置 主备一致
172.16.77.201
}
}
keepalived检测脚本(两个库一样)
[root@pg1 postgresql-12.2]# cat /etc/keepalived/scripts/check_pg.sh
#!/bin/bash
#判断pg是否活着
A=`ps -C postgres --no-header | wc -l`
#判断vip浮到哪里 这里的vip需要更改成自己设置的vip
B=`ip a | grep 172.16.77.201 | wc -l`
#判断是否是主库
C=`su - postgres -c "pg_controldata | grep cluster" | grep production | wc -l`
#判断是否是从库
D=`su - postgres -c "pg_controldata | grep cluster" | grep recovery | wc -l`
#判断主库是否有从库的同步进程
E=`ps -ef | grep postgres | grep 'walsender repl' | wc -l`
#如果pg死了,将消息写入日记并且关闭keepalived
if [ $A -eq 0 ];then
echo "`date "+%Y-%m-%d--%H:%M:%S"` postgresql stop so vip stop " >> /etc/keepalived/log/check_pg.log
systemctl stop keepalived
else
#判断出主ku挂了,vip漂移到了从,提升从的地位让他可读写
# if [ $B -eq 1 -a $C -eq 1 -a $D -eq 0 ];then
if [ $B -eq 1 -a $C -eq 0 -a $D -eq 1 ];then
su - postgres -c "pg_ctl promote -D /usr/local/postgresql/data" #重新加载pgsql使其可写
sed -i 's/primary_conninfo/\#primary_conninfo/g' /usr/local/postgresql/data/postgresql.auto.conf
echo "`date "+%Y-%m-%d--%H:%M:%S"` standby promote " >> /etc/keepalived/log/check_pg.log
fi
#判断出自己是主并且和从失去联系
if [ $B -eq 1 -a $C -eq 1 -a $D -eq 0 -a $E -eq 0 ];then
echo "`date "+%Y-%m-%d--%H:%M:%S"` can't find standby " >> /etc/keepalived/log/check_pg.log
fi
fi
两台机器都执行
mkdir /etc/keepalived/log
vim /etc/sysconfig/keepalived
把KEEPALIVED_OPTIONS="-D" 修改为KEEPALIVED_OPTIONS="-D -d -S 0"
vim /etc/rsyslog.conf
加入如下配置:
#keepalived -S 0
local0.* /var/log/keepalived.log
启动keepalived服务检查VIP情况(前提是两台服务器数据库已经启动)
[root@pg1 log]# systemctl start keepalived && systemctl enable keepalived
[root@pg1 log]# ip a
1: lo: <LOOPBACK,UP,LOWER_UP> mtu 65536 qdisc noqueue state UNKNOWN group default qlen 1000
link/loopback 00:00:00:00:00:00 brd 00:00:00:00:00:00
inet 127.0.0.1/8 scope host lo
valid_lft forever preferred_lft forever
inet6 ::1/128 scope host
valid_lft forever preferred_lft forever
2: eth0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP group default qlen 1000
link/ether fa:7d:49:74:fc:85 brd ff:ff:ff:ff:ff:ff
inet 172.16.77.171/24 brd 172.16.77.255 scope global noprefixroute eth0
valid_lft forever preferred_lft forever
inet 172.16.77.201/32 scope global eth0
valid_lft forever preferred_lft forever
inet6 fe80::f87d:49ff:fe74:fc85/64 scope link
valid_lft forever preferred_lft forever
四.检测
检测过程大家自己清楚,一定做好充分验证。
五.优化
通过以上方案,已经完成了postgres高可用,但是不完美
实现了vip漂移
实现了从数据库自动切换为主
但是还需要手工把宕机的主服务器启动,并且需要一系列操作把原主库切换为从库,稍微有些麻烦
下面我写了脚本,实现让原主库自动完成删除原数据,同步主数据库,作为从库启动数据库
脚本需要使用postgres用户执行
pg1自动切换脚本
[root@pg1 ~]# cat /home/postgres/postgresql-12.2/pqsql.sh
#!/bin/bash
#pg_ctl /usr/local/postgresql/log/pglog.log stop
rm -rf /usr/local/postgresql/data/* && echo "数据删除成功" || echo "数据删除发生错误" #也可以将数据备份
sleep 1
#同步主库数据,ip为主库ip
pg_basebackup -D /usr/local/postgresql/data/ -F p -X stream -v -P -h pg2 -U repl && echo “数据同步成功” || echo “数据同步发生错误”
sleep 2
cp /home/postgres/postgresql-12.2/standby.signal /usr/local/postgresql/data/ && echo "文件standby.signal修改成功" || echo "standby.signal修改失败"
sleep 0.5
rm -rf /usr/local/postgresql/data/postgresql.auto.conf
sleep 0.5
cp /home/postgres/postgresql-12.2/postgresql.auto.conf /usr/local/postgresql/data/ && echo "文件postgresql.auto.conf修改成功" || echo "postgresql.auto.conf修改失败"
sleep 0.5
pg_ctl -l /usr/local/postgresql/log/pglog.log start && echo "pgsql start success" || echo "pgsql start fial"
sleep 1
echo "提示:请去修改keepalived优先级(/etc/keepalived/keepalived.conf)!!!然后开启keepalived!!!"
执行脚本需要的关联文件
[root@pg1 ~]# cat /home/postgres/postgresql-12.2/standby.signal
primary_conninfo = 'host=pg2 application_name=standby_pg1 port=5432 user=repl password=oracle options=''-c wal_sender_timeout=5000'''
standby_mode = on
[root@pg1 ~]# cat /home/postgres/postgresql-12.2/postgresql.auto.conf
primary_conninfo = 'host=pg2 application_name=standby_pg1 port=5432 user=repl password=oracle options=''-c wal_sender_timeout=5000'''
上述所有文件属主都要为postgres用户
pg2自动切换脚本
[postgres@pg2 ~]$ cat /home/postgres/postgresql-12.2/pqsql.sh
#!/bin/bash
#pg_ctl /usr/local/postgresql/log/pglog.log stop
rm -rf /usr/local/postgresql/data/* && echo "数据删除成功" || echo "数据删除发生错误" #也可以将数据备份
sleep 1
#同步主库数据,ip为主库ip
pg_basebackup -D /usr/local/postgresql/data/ -F p -X stream -v -P -h pg1 -U repl && echo “数据同步成功” || echo “数据同步发生错误”
sleep 2
cp /home/postgres/postgresql-12.2/standby.signal /usr/local/postgresql/data/ && echo "文件standby.signal修改成功" || echo "standby.signal修改失败"
sleep 0.5
rm -rf /usr/local/postgresql/data/postgresql.auto.conf
sleep 0.5
cp /home/postgres/postgresql-12.2/postgresql.auto.conf /usr/local/postgresql/data/ && echo "文件postgresql.auto.conf修改成功" || echo "postgresql.auto.conf修改失败"
sleep 0.5
pg_ctl -l /usr/local/postgresql/log/pglog.log start && echo "pgsql start success" || echo "pgsql start fial"
sleep 1
echo "提示:请去修改keepalived优先级(/etc/keepalived/keepalived.conf)!!!然后开启keepalived!!!"
[postgres@pg2 ~]$ cat /home/postgres/postgresql-12.2/standby.signal
primary_conninfo = 'host=pg1 application_name=standby_pg2 port=5432 user=repl password=oracle options=''-c wal_sender_timeout=5000'''
#restore_command = 'cp /home/postgres/arch/%f %p'
#archive_cleanup_command = 'pg_archivecleanup /home/postgres/arch %r'
standby_mode = on
[postgres@pg2 ~]$ cat /home/postgres/postgresql-12.2/postgresql.auto.conf
primary_conninfo = 'host=pg1 application_name=standby_pg2 port=5432 user=repl password=oracle options=''-c wal_sender_timeout=5000'''
当主库挂了,从自动切换为主后,在原主库执行脚本
[postgres@pg2 ~]$ bash ~/postgresql-12.2/pqsql.sh
数据删除成功
pg_basebackup: initiating base backup, waiting for checkpoint to complete
pg_basebackup: checkpoint completed
pg_basebackup: write-ahead log start point: 0/23000028 on timeline 5
pg_basebackup: starting background WAL receiver
pg_basebackup: created temporary replication slot "pg_basebackup_23488"
24690/24690 kB (100%), 1/1 tablespace
pg_basebackup: write-ahead log end point: 0/23000100
pg_basebackup: waiting for background process to finish streaming ...
pg_basebackup: syncing data to disk ...
pg_basebackup: base backup completed
“数据同步成功”
文件standby.signal修改成功
文件postgresql.auto.conf修改成功
waiting for server to start.... done
server started
pgsql start success
提示:请去修改keepalived优先级(/etc/keepalived/keepalived.conf)!!!然后开启keepalived!!!
[postgres@pg2 ~]$ vim /etc/keepalived/keepalived.conf 更改优先级,使其小于新的主库
[postgres@pg2 ~]$ systemctl start keepalived