基于repmgr的pg库主备切换

系统版本centos7.9
postgresql版本13.4
repmgr版本5.2.1
主机:192.168.101.132 node1
备机:192.168.101.133 node2

1、安装repmgr(主备库都要安装)

[root@node1 ~]# tar -zxvf repmgr-5.2.1.tar.gz 
[root@node1 ~]# cd repmgr-5.2.1
[root@node1 repmgr-5.2.1]# ./configure --prefix=/home/postgresql
//这里编译会报错,因为没有flex
[root@node1 repmgr-5.2.1]# yum install -y flex
//安装之后在执行上述编译
[root@node1 repmgr-5.2.1]# make && make install

2、主库配置

2.1、基本信息

[root@node1 ~]# cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.101.132 node1
192.168.101.133 node2
[root@node1 ~]# cat /etc/profile
... ...
unset i
unset -f pathmunge
PATH=$PATH:/home/postgresql/bin
[root@node1 ~]# su - postgres
Last login: Wed Sep 22 17:19:08 CST 2021 on pts/0
[postgres@node1 ~]$ pwd
/home/postgres
[postgres@node1 ~]$ cat .pgpass 
192.168.101.132:5432:repmgr:repmgr:repmgr
192.168.101.133:5432:repmgr:repmgr:repmgr
[postgres@node1 ~]$ cat .bash_profile 
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
    . ~/.bashrc
fi

# User specific environment and startup programs

PATH=$PATH:$HOME/.local/bin:$HOME/bin

export PATH

PGHOME=/home/postgresql

export PGHOME

PGDATA=$PGHOME/data

export PGDATA

PATH=$PATH:$HOME/.local/bin:$HOME/bin:$PGHOME/bin

export PATH
[postgres@node1 ~]$ vim /home/postgresql/data/postgresql.conf
unix_socket_directories='/home/postgresql/data'
wal_log_hints = on
archive_mode = on
archive_command = 'arch.sh %f %p' 
#archive_command = 'test ! -f /home/postgres/pgarch/%f && cp %p /home/postgres/pgarch/%f'

//arch.sh脚本位置及内容
[postgres@node1 pgdata]$ pwd
/home/postgres/pgdata
[postgres@node1 pgdata]$ ll
total 4
drwxrwxr-x. 2 postgres postgres   6 Sep 23 11:15 arch
-rw-rw-r--. 1 postgres postgres 178 Sep 23 11:13 arch.sh
[postgres@node1 pgdata]$ cat arch.sh //删除7天内的归档日志。
test ! -f /home/postgresql/pgdata/arch/$1 && cp --preserve=timestamps $2 /home/postgresql/pgdata/arch/$1 ; find /home/postgresql/pgdata/arch/ -type f -mtime +7 -exec rm -f {} \;

//创建复制用户
[postgres@node1 ~]$ createuser -s -P repmgr  //-P设置密码,与用户名一样即可,并且对应上述.pgpass文件中的配置信息
[postgres@node1 ~]$ createdb repmgr -O repmgr
[postgres@node1 ~]$ vim /home/postgresql/data/pg_hba.conf
# replication privilege.
host    replication     repmgr          192.168.101.0/24        trust

2.2、备库连接测试

[root@node2 ~]# psql 'host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2'

2.3、创建repmgr.conf文件

[root@node1 ~]# vim /etc/repmgr.conf 
node_id=1
node_name=node1
conninfo='host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/home/postgresql/data'
pg_bindir='/home/postgresql/bin'

2.4、注册主库

[postgres@node1 ~]# repmgr -f /etc/repmgr.conf primary register

2.5、查看信息

[postgres@node1 ~]# repmgr cluster show
[postgres@node1 ~]# psql -d repmgr
psql (13.4)
Type "help" for help.

repmgr=# SELECT * FROM repmgr.nodes;

3、备库配置

3.1、基本配置

[root@node2 ~]# cat /etc/hosts
127.0.0.1   localhost localhost.localdomain localhost4 localhost4.localdomain4
::1         localhost localhost.localdomain localhost6 localhost6.localdomain6
192.168.101.132 node1
192.168.101.133 node2
[root@node2 ~]# su - postgres
Last login: Wed Sep 22 17:19:08 CST 2021 on pts/0
[postgres@node2 ~]$ pwd
/home/postgres
[postgres@node2 ~]$ cat .pgpass 
192.168.101.132:5432:repmgr:repmgr:repmgr
192.168.101.133:5432:repmgr:repmgr:repmgr
[postgres@node2 ~]$ cat .bash_profile 
# .bash_profile

# Get the aliases and functions
if [ -f ~/.bashrc ]; then
    . ~/.bashrc
fi

# User specific environment and startup programs

PATH=$PATH:$HOME/.local/bin:$HOME/bin

export PATH

PGHOME=/home/postgresql

export PGHOME

PGDATA=$PGHOME/data

export PGDATA

PATH=$PATH:$HOME/.local/bin:$HOME/bin:$PGHOME/bin

export PATH

3.2、创建repmgr.conf文件

[root@node2 ~]# vim /etc/repmgr.conf 
node_id=2
node_name=node2
conninfo='host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2'
data_directory='/home/postgresql/data'
pg_bindir='/home/postgresql/bin'

3.3、克隆主库data目录

[postgres@node2 ~]$ repmgr -h 192.168.101.132 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone --dry-run
[postgres@node2 ~]$ repmgr -h 192.168.101.132 -U repmgr -d repmgr -f /etc/repmgr.conf standby clone

3.4、启动pg库并注册备库

[postgres@node2 ~]$ repmgr -f /etc/repmgr.conf standby register

3.5、查看信息

[postgres@node2 ~]# repmgr cluster show

4、配置主备ssh互信

4.1、主机

[postgres@node1 ~]$ ssh-keygen -t rsa //一直回车就好
[postgres@node1 ~]$ ssh-copy-id -i .ssh/id_rsa.pub postgres@node2
//这里报错是因为node2节点postgres用户没有设置密码
/bin/ssh-copy-id: INFO: Source of key(s) to be installed: ".ssh/id_rsa.pub"
/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
postgres@node2's password:
Permission denied, please try again.
postgres@node2's password:
[root@node2 ~]# passwd postgres //设置postgres密码
//成功再次执行上述命令
[postgres@node1 ~]$ ssh node2 date //不提示需要密码,返回时间即为正确

4.2、备机

[postgres@node2 ~]$ ssh-keygen -t rsa //一直回车就好
[postgres@node2 ~]$ ssh-copy-id -i .ssh/id_rsa.pub postgres@node1
//这里报错是因为node1节点postgres用户没有设置密码
/bin/ssh-copy-id: INFO: Source of key(s) to be installed: ".ssh/id_rsa.pub"
/bin/ssh-copy-id: INFO: attempting to log in with the new key(s), to filter out any that are already installed
/bin/ssh-copy-id: INFO: 1 key(s) remain to be installed -- if you are prompted now it is to install the new keys
postgres@node1's password:
Permission denied, please try again.
postgres@node1's password:
[root@node1 ~]# passwd postgres //设置postgres密码
//成功再次执行上述命令
[postgres@node2 ~]$ ssh node1 date //不提示需要密码,返回时间即为正确

5、切换测试

5.1、switchover

[postgres@node2 ~]$ repmgr standby switchover
NOTICE: executing switchover on node "node2" (ID: 2)
NOTICE: local node "node2" (ID: 2) will be promoted to primary; current primary "node1" (ID: 1) will be demoted to standby
NOTICE: stopping current primary node "node1" (ID: 1)
NOTICE: issuing CHECKPOINT
DETAIL: executing server command "pg_ctl  -D '/home/postgresql/data' -W -m fast stop"
INFO: checking for primary shutdown; 1 of 60 attempts ("shutdown_check_timeout")
INFO: checking for primary shutdown; 2 of 60 attempts ("shutdown_check_timeout")
INFO: checking for primary shutdown; 3 of 60 attempts ("shutdown_check_timeout")
NOTICE: current primary has been cleanly shut down at location 0/14000028
NOTICE: promoting standby to primary
DETAIL: promoting server "node2" (ID: 2) using "pg_ctl  -w -D '/home/postgresql/data' promote"
waiting for server to promote.... done
server promoted
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
INFO: local node 1 can attach to rejoin target node 2
DETAIL: local node's recovery point: 0/14000028; rejoin target node's fork point: 0/14000098
NOTICE: setting node 1's upstream to node 2
WARNING: unable to ping "host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2"
DETAIL: PQping() returned "PQPING_NO_RESPONSE"
NOTICE: starting server using "pg_ctl  -w -D '/home/postgresql/data' start"
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2
NOTICE: switchover was successful
DETAIL: node "node2" is now primary and node "node1" is attached as standby
NOTICE: STANDBY SWITCHOVER has completed successfully

5.2、promote

模仿主机宕机
[postgres@node1 ~]$ /home/postgresql/bin/pg_ctl stop -D /home/postgresql/data
waiting for server to shut down.... done
server stopped
[postgres@node2 ~]$ repmgr cluster show
ID | Name | Role    | Status        | Upstream | Location | Priority | Timeline | Connection string                                      
----+------+---------+---------------+----------+----------+----------+----------+------------------------------------------------------------------
1  | node1  | primary | ? unreachable | ?        | default  | 100      |          | host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2
2  | node2  | standby |   running     | ? node1    | default  | 100      | 2        | host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2

WARNING: following issues were detected
- unable to connect to node "node1" (ID: 1)
- node "node1" (ID: 1) is registered as an active primary but is unreachable
- unable to connect to node "node2" (ID: 2)'s upstream node "node1" (ID: 1)
- unable to determine if node "node2" (ID: 2) is attached to its upstream node "node1" (ID: 1)

HINT: execute with --verbose option to see connection error messages
[postgres@node2 ~]$ repmgr standby promote
NOTICE: promoting standby to primary
DETAIL: promoting server "node2" (ID: 2) using pg_promote()
NOTICE: waiting up to 60 seconds (parameter "promote_check_timeout") for promotion to complete
NOTICE: STANDBY PROMOTE successful
DETAIL: server "node2" (ID: 2) was successfully promoted to primary
[postgres@node2 ~]$ repmgr standby promote
ERROR: STANDBY PROMOTE can only be executed on a standby node
[postgres@node2 ~]$ repmgr cluster show
ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------------------
1  | node1  | primary | - failed  | ?        | default  | 100      |          | host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2
2  | node2  | primary | * running |          | default  | 100      | 4        | host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2

WARNING: following issues were detected
- unable to connect to node "node1" (ID: 1)

HINT: execute with --verbose option to see connection error messages
[postgres@node1 ~]$ repmgr node rejoin -d 'host=192.168.101.133 port=5432 user=repmgr dbname=repmgr' --force-rewind --config-files=postgresql.conf,postgresql.auto.conf --verbose
INFO: looking for configuration file in /etc
INFO: configuration file found at: "/etc/repmgr.conf"
INFO: prerequisites for using pg_rewind are met
INFO: 2 files copied to "/tmp/repmgr-config-archive-node1"
NOTICE: executing pg_rewind
DETAIL: pg_rewind command is "/home/postgresql/bin/pg_rewind -D '/home/postgresql/data' --source-server='host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2'"
NOTICE: 2 files copied to /home/postgresql/data
INFO: directory "/tmp/repmgr-config-archive-node1" deleted
NOTICE: setting node 1's upstream to node 2
WARNING: unable to ping "host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2"
DETAIL: PQping() returned "PQPING_NO_RESPONSE"
NOTICE: starting server using "/home/postgresql/bin/pg_ctl  -w -D '/home/postgresql/data' start"
INFO: node "node1" (ID: 1) is pingable
INFO: node "node1" (ID: 1) has attached to its upstream node
NOTICE: NODE REJOIN successful
DETAIL: node 1 is now attached to node 2
[postgres@node1 ~]$ repmgr cluster show
ID | Name | Role    | Status    | Upstream | Location | Priority | Timeline | Connection string                                          
----+------+---------+-----------+----------+----------+----------+----------+------------------------------------------------------------------
1  | node1  | standby |   running | node2      | default  | 100      | 3        | host=192.168.101.132 user=repmgr dbname=repmgr connect_timeout=2
2  | node2  | primary | * running |          | default  | 100      | 4        | host=192.168.101.133 user=repmgr dbname=repmgr connect_timeout=2

切换成功,主备恢复

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值