之前做过postgresql+regmgr+pgbouncer的组合,实现三台(主、备、见证)服务器实现postgresql的高可用,这种方式主要使用regmgr配置流复制,利用postgresql的流复制进行主备同步,然后使用pgbouncer绑定第三个ip地址对外提供服务,当主库宕机后,regmgr的见证服务会通知备库执行相关failover的脚本,而脚本里除了提供触发提升备库为主库的命令外,还提供发送更改pgbouncer配置并重启pgbouncer的命令,进而pgbouncer后端的数据库链接指向新的主库,而对外提供服务的ip不变。这种防守的优点是配置较为简单,流复制的备库还能提供一些查询服务,但备库提升为主库后,原来的主库需要使用regmgr重新生成主库,还要开启regmgr监控进程,因此故障只能切换一次,然后进行人工干预(虽然此步骤也可以写成脚本实现自动切换,但我感觉最好还是手工提升,避免来回切换造成数据丢失)。这两天实验了下centos6.4 x-64下的pacemaker+corosync+drbd来实现postgresql的高可用。
[root@pgtest3 ~]# more /etc/yum.repos.d/CentOS-Base.repo
[c6-media]
name=CentOS-$releasever - Media
baseurl=file:///media/
gpgcheck=0
enabled=1
这些都准备好了,就可以安装相关软件了
2.安装corosync(两台节点都需要)
corosync是类似于heatbeat的openais的集群消息组件,用于集群底层的消息传递,上层使用pacemaker进行集群资源管理。
修改配置文件
[root@pgtest3 corosync]# vim /etc/corosync/corosync.conf
[root@pgtest3 corosync]# cat corosync.conf
# Please read the corosync.conf.5 manual page
compatibility: whitetank
totem {
version: 2
secauth: on
threads: 0
interface {
ringnumber: 0
bindnetaddr: 192.168.1.0 #心跳的网段,建议心跳用心跳线连接,并和局域网的ip隔离,我这里是虚拟 #机,就没有采用另外的私有网段隔离
mcastaddr: 226.94.1.1 #组播地址,这个可以不陪着
mcastport: 5405
ttl: 1
}
}
logging {
fileline: off
to_stderr: no
to_logfile: yes
to_syslog: yes
logfile: /var/log/cluster/corosync.log #日志的输出文件
debug: off
timestamp: on
logger_subsys {
subsys: AMF
debug: off
}
}
amf {
mode: disabled
}
service {
ver: 0
name: pacemaker #上层集群管理服务,这里设置为pacemaker
}
aisexec {
user: root #用户
group: root #组
}
注意:红色的都需要进行配置
[root@pgtest3 corosync]# cd /etc/corosync
[root@pgtest3 corosync]# corosync-keygen
然后把配置文件和密钥文件都传输到pgtest4上:
[root@pgtest3 corosync]# scp authkey corosync.conf pgtest4:/etc/corosync/
3.Pacemaker 安装与配置(两个节点都需要配置)
[root@pgtest3 corosync]# yum install -y pacemaker
安装好pacemaker后,还需要安装crmsh,这是集群管理的命令行工具:
[root@pgtest3 ~]# wget http://download.openSUSE.org/repositories/network:/ha-clustering:/Stable/CentOS_CentOS-6/x86_64/crmsh-2.0+git46-1.1.x86_64.rpm
[root@pgtest3 ~]# rpm -ivh crmsh-2.0+git46-1.1.x86_64.rpm
warning: crmsh-2.0+git46-1.1.x86_64.rpm: Header V3 RSA/SHA1 Signature, key ID 17280ddf: NOKEY
error: Failed dependencies:
pssh is needed by crmsh-2.0+git46-1.1.x86_64
redhat-rpm-config is needed by crmsh-2.0+git46-1.1.x86_64
安装依赖包(这两个包在本地光盘可以找到):
[root@pgtest3 ~]# yum install pssh redhat-rpm-config
[root@pgtest3 ~]# rpm -ivh crmsh-2.0+git46-1.1.x86_64.rpm
warning: crmsh-2.0+git46-1.1.x86_64.rpm: Header V3 RSA/SHA1 Signature, key ID 17280ddf: NOKEY
Preparing... ########################################### [100%]
1:crmsh ########################################### [100%]
成功安装后,crm命令可以进入命令行
启动coresync,由于我们已经配置了corosync.conf里配置了service,pacemaker会被包含进coresync里进行启动:
[root@pgtest3 ~]# service coresync start
4.安装drbd
[root@pgtest3 ~]# yum -y install drbd84 kmod-drbd84
配置drbd
[root@pgtest3 ~]# cat /etc/drbd.conf
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example
include "drbd.d/global_common.conf";
include "drbd.d/*.res";
[root@pgtest3 ~]# ls /etc/drbd.d/
global_common.conf r0.res
其中global_common.conf是全局配置,r0.res是定制的资源配置
[root@pgtest3 ~]# cat /etc/drbd.d/global_common.conf
global {
usage-count no;
# minor-count dialog-refresh disable-ip-verification
}
common {
handlers {
pri-on-incon-degr "/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
pri-lost-after-sb "/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f";
local-io-error "/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f";
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
# before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
# after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
startup {
# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb
}
options {
# cpu-mask on-no-data-accessible
}
disk {
# size max-bio-bvecs on-io-error fencing disk-barrier disk-flushes
# disk-drain md-flushes resync-rate resync-after al-extents
# c-plan-ahead c-delay-target c-fill-target c-max-rate
# c-min-rate disk-timeout
on-io-error detach; #同步错误的做法是分离
}
net {
# protocol timeout max-epoch-size max-buffers unplug-watermark
# connect-int ping-int sndbuf-size rcvbuf-size ko-count
# allow-two-primaries cram-hmac-alg shared-secret after-sb-0pri
# after-sb-1pri after-sb-2pri always-asbp rr-conflict
# ping-timeout data-integrity-alg tcp-cork on-congestion
# congestion-fill congestion-extents csums-alg verify-alg
# use-rle
cram-hmac-alg "sha1"; #设置加密算法sha1
shared-secret "mydrbdlab"; #设置加密key
}
}
[root@pgtest3 ~]# cat /etc/drbd.d/r0.res
resource r0{
on pgtest3{
device /dev/drbd1;
disk /dev/sdb1;
address 10.1.1.12:7788;
meta-disk internal;
}
on pgtest4{
device /dev/drbd1;
disk /dev/sdb1;
address 10.1.1.13:7788;
meta-disk internal;
}
}
创建drbd meta(两个节点执行):
[root@pgtest3 drbd.d]# drbdadm create-md r0
Writing meta data...
initializing activity log
NOT initializing bitmap
New drbd meta data block successfully created.
创建块设备(两个节点执行):
[root@pgtest3 drbd.d]# mknod /dev/drbd1 b 147 0
启动drbd服务(两个节点执行):
[root@pgtest3 drbd.d]# service drbd start
数据同步(一个节点执行):
[root@pgtest3 drbd.d]# drbdadm -- --overwrite-data-of-peer primary r0
格式化并挂载:
[root@pgtest3 drbd.d]# mkfs.ext4 /dev/drbd1
[root@pgtest3 ~]# mkdir /r0
[root@pgtest3 ~]# mount /dev/drbd1 /r0/
[root@pgtest3 ~]# /etc/init.d/drbd status
drbd driver loaded OK; device status:
version: 8.4.4 (api:1/proto:86-101)
GIT-hash: 74402fecf24da8e5438171ee8c19e28627e1c98a build by root@pgtest3, 2014-06-08 16:17:34
m:res cs ro ds p mounted fstype
1:r0 Connected Primary/Secondary UpToDate/UpToDate C
如果要到备库上挂载:
[root@pgtest3 ~]# umount /r0/
[root@pgtest3 ~]# drbdadm secondary r0
[root@pgtest4 ~]# drbdadm primary r0
[root@pgtest4 ~]# mkdir /r0
[root@pgtest4 ~]# mount /dev/drbd1 /r0/
设置drbd自启动关闭(因为启动是需要用集群来管理的):
[root@pgtest3 init.d]# chkconfig drbd off
[root@pgtest4 init.d]# chkconfig drbd off
5.在两台机器上都安装pg,并把PG_DATA目录放到drbd的文件夹/r0下,然后设置开机自启动:
[pg@pgtest3 data]$ cp /opt/soft/postgresql-9.3.4/contrib/start-scripts/linux /etc/init.d/postgresql
设置pgdata等参数
[root@pgtest3 init.d]# chkconfig --add postgresql
[root@pgtest3 init.d]# chkconfig postgresql off
配置crmsh 资源管理
两个节点都要关闭drbd
[root@pgtest3 ~]# service drbd stop
Stopping all DRBD resources: .
[root@pgtest3 ~]# chkconfig drbd off
6.进入crm进行配置:
增加drbd资源:
[root@pgtest3 ~]# crm
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory (2)
crm(live)# configure
crm(live)configure# property stonith-enabled=false #设置stonith-enabled关闭,不使用外部设备控制
crm(live)configure# property no-quorum-policy=ignore #设置法定人数,由于只有两个节点,所以为ignore
crm(live)configure# verify #验证配置是否合法
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory (2)
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory (2)
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory (2)
#报这个错的话创建/var/lib/pacemaker/cores/root这个文件夹就可以了
crm(live)configure# commit #提交,写入cib配置中
crm(live)configure# primitive postgresql ocf:heartbeat:drbd params drbd_resource=r0 op start timeout=240 op stop timeout=100 op monitor role=Master interval=20 timeout=30 op monitor role=Slave interval=30 timeout=30 #配置drbd资源,资源名定义为postgresql,也可以定义为别的
crm(live)configure# ms ms_postgresql postgresql meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify=true #配置主备库资源,ms_postgresql为名字,postgresql为drbd资源名
crm(live)configure# show #查看配置
node pgtest3
node pgtest4
primitive postgresql drbd \
params drbd_resource=r0 \
op start timeout=240 interval=0 \
op stop timeout=100 interval=0 \
op monitor role=Master interval=20 timeout=30 \
op monitor role=Slave interval=30 timeout=30
ms ms_postgresql postgresql \
meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify=true
property cib-bootstrap-options: \
dc-version=1.1.8-7.el6-394e906 \
cluster-infrastructure="classic openais (with plugin)" \
expected-quorum-votes=2 \
stonith-enabled=false \
no-quorum-policy=ignore
crm(live)configure# quit
There are changes pending. Do you want to commit them (y/n)? y #提示保存,或者commit再退出
bye
[root@pgtest3 ~]# crm status
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory (2)
Last updated: Mon Jun 9 14:14:31 2014
Last change: Mon Jun 9 14:10:12 2014 via cibadmin on pgtest3
Stack: classic openais (with plugin)
Current DC: pgtest3 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
2 Resources configured.
Online: [ pgtest3 pgtest4 ]
Master/Slave Set: ms_postgresql [postgresql]
postgresql:0 (ocf::heartbeat:drbd): Slave pgtest3 (unmanaged) FAILED
postgresql:1 (ocf::heartbeat:drbd): Slave pgtest4 (unmanaged) FAILED
Failed actions:
postgresql_stop_0 (node=pgtest3, call=18, rc=5, status=complete): not installed
postgresql_stop_0 (node=pgtest4, call=18, rc=5, status=complete): not installed
这一步一直报错,找不到原因,两台机器重启下就好了。
增加文件系统资源:
[root@pgtest3 ~]# crm
Cannot change active directory to /var/lib/pacemaker/cores/root: No such file or directory (2)
crm(live)# configure
crm(live)configure# primitive pgstore ocf:heartbeat:Filesystem params device=/dev/drbd1 directory=/r0 fstype=ext4 op start timeout=60 op stop timeout=60
crm(live)configure# verify
crm(live)configure# colocation pgstore_with_ms_postgresql inf: pgstore ms_postgresql:Master #colocation 是排列的意思,这一步是让资源 pgstore和资源ms_postgresql:Master放在一个节点上启动
crm(live)configure# order pgstore_after_ms_postgresql mandatory: ms_postgresql:promote pgstore:start
#这一步是排序,让ms_postgresql资源先promote提升备机,然后pgstroe资源启动,也就是drbd文件系统启动
crm(live)configure# verify
crm(live)configure# commit
crm(live)configure# show
node pgtest3
node pgtest4
primitive mystore Filesystem \
params device="/dev/drbd1" directory="/r0" fstype=ext4 \
op start timeout=60 interval=0 \
op stop timeout=60 interval=0
primitive postgresql drbd \
params drbd_resource=r0 \
op start timeout=240 interval=0 \
op stop timeout=100 interval=0 \
op monitor role=Master interval=20 timeout=30 \
op monitor role=Slave interval=30 timeout=30
ms ms_postgresql postgresql \
meta master-max=1 master-node-max=1 clone-max=2 clone-node-max=1 notify=true
colocation mystore_with_ms_postgresql inf: mystore ms_postgresql:Master
order mystore_after_ms_postgresql Mandatory: ms_postgresql:promote mystore:start
property cib-bootstrap-options: \
dc-version=1.1.8-7.el6-394e906 \
cluster-infrastructure="classic openais (with plugin)" \
expected-quorum-votes=2 \
stonith-enabled=false \
no-quorum-policy=ignore
增加postgresql资源
crm(live)configure# primitive pgresource lsb:postgresql #增加postgresql的
crm(live)configure# colocation pg_with_pgstore inf: pgresource pgstore
#这一步让postgresql服务和pgstore资源也就是drbd文件系统绑定在一台节点上启动
增加vip资源:
crm(live)configure# primitive vip ocf:heartbeat:IPaddr params ip=10.1.1.200 nic=eth1 cidr_netmask=255.255.255.0 #增加vip的ip地址为10.1.1.200,并在eth1网卡上启动
crm(live)configure# colocation vip_with_ms_pgdrbd inf: ms_postgresql:Master vip
#这一步让vip资源和ms_postgresql的主节点资源绑定在一台节点启动
crm(live)configure# verify
crm(live)configure# commit
查看集群状态:
[root@pgtest4 ~]# crm status
Last updated: Mon Jun 9 21:43:58 2014
Last change: Mon Jun 9 20:29:54 2014 via cibadmin on pgtest4
Stack: classic openais (with plugin)
Current DC: pgtest4 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
5 Resources configured.
Online: [ pgtest3 pgtest4 ]
Master/Slave Set: ms_postgresql [postgresql]
Masters: [ pgtest4 ]
Slaves: [ pgtest3 ]
pgresource (lsb:postgresql): Started pgtest4
pgstore (ocf::heartbeat:Filesystem): Started pgtest4
vip (ocf::heartbeat:IPaddr): Started pgtest4
6.测试高可用性
先查看哪台是主库:
[root@pgtest3 ~]# ps -ef|grep post
pg 2876 1 0 08:42 ? 00:00:00 /opt/pgsql/bin/postmaster -D /r0/data
pg 2882 2876 0 08:42 ? 00:00:00 postgres: logger process
pg 2884 2876 0 08:42 ? 00:00:00 postgres: checkpointer process
pg 2885 2876 0 08:42 ? 00:00:05 postgres: writer process
pg 2886 2876 0 08:42 ? 00:00:00 postgres: wal writer process
pg 2887 2876 0 08:42 ? 00:00:00 postgres: autovacuum launcher process
pg 2888 2876 0 08:42 ? 00:00:00 postgres: archiver process
pg 2889 2876 0 08:42 ? 00:00:00 postgres: stats collector process
root 5866 3302 0 13:23 pts/2 00:00:00 grep post
此时连接数据库正常:
[root@pgtest3 ~]# psql -h 10.1.1.200 -U pg postgres -p 5433
postgres=# SELECT count(*) from tbl_cost_align ;
count
-------
5
(1 row)
[root@pgtest3 ~]# crm
crm(live)# node
crm(live)node# standby pgtest3
此时命令行查询会阻塞一会,然后就可以再次正常查询了,再看集群状态已经漂移到pgest4上了:
[root@pgtest4 ~]# crm status
Last updated: Tue Jun 10 13:30:06 2014
Last change: Tue Jun 10 13:24:11 2014 via crm_attribute on pgtest3
Stack: classic openais (with plugin)
Current DC: pgtest3 - partition with quorum
Version: 1.1.8-7.el6-394e906
2 Nodes configured, 2 expected votes
5 Resources configured.
Node pgtest3: standby
Online: [ pgtest4 ]
Master/Slave Set: ms_postgresql [postgresql]
Masters: [ pgtest4 ]
Stopped: [ postgresql:1 ]
pgresource (lsb:postgresql): Started pgtest4
pgstore (ocf::heartbeat:Filesystem): Started pgtest4
vip (ocf::heartbeat:IPaddr): Started pgtest4
[root@pgtest4 ~]# ps -ef|grep post
pg 25610 1 0 13:24 ? 00:00:00 /opt/pgsql/bin/postmaster -D /r0/data
pg 25611 25610 0 13:24 ? 00:00:00 postgres: logger process
pg 25614 25610 0 13:24 ? 00:00:00 postgres: checkpointer process
pg 25615 25610 0 13:24 ? 00:00:00 postgres: writer process
pg 25616 25610 0 13:24 ? 00:00:00 postgres: wal writer process
pg 25617 25610 0 13:24 ? 00:00:00 postgres: autovacuum launcher process
pg 25618 25610 0 13:24 ? 00:00:00 postgres: archiver process
pg 25619 25610 0 13:24 ? 00:00:00 postgres: stats collector process
root 25684 25622 0 13:24 pts/1 00:00:00 grep post
[root@pgtest4 ~]# df -h
Filesystem Size Used Avail Use% Mounted on
/dev/sda1 7.7G 6.6G 771M 90% /
tmpfs 499M 18M 481M 4% /dev/shm
/dev/sda3 7.6G 4.8G 2.5G 66% /postgres
/dev/drbd1 5.0G 575M 4.2G 12% /r0
[root@pgtest4 ~]# ifconfig
eth0 Link encap:Ethernet HWaddr 08:00:27:A5:50:BA
inet addr:10.0.2.15 Bcast:10.0.2.255 Mask:255.255.255.0
inet6 addr: fe80::a00:27ff:fea5:50ba/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:4 errors:0 dropped:0 overruns:0 frame:0
TX packets:18 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:791 (791.0 b) TX bytes:1563 (1.5 KiB)
eth1 Link encap:Ethernet HWaddr 08:00:27:34:CC:C4
inet addr:10.1.1.13 Bcast:10.1.1.255 Mask:255.255.255.0
inet6 addr: fe80::a00:27ff:fe34:ccc4/64 Scope:Link
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
RX packets:147105 errors:0 dropped:0 overruns:0 frame:0
TX packets:95266 errors:0 dropped:0 overruns:0 carrier:0
collisions:0 txqueuelen:1000
RX bytes:21927610 (20.9 MiB) TX bytes:13961871 (13.3 MiB)
eth1:0 Link encap:Ethernet HWaddr 08:00:27:34:CC:C4
inet addr:10.1.1.200 Bcast:10.1.1.255 Mask:255.255.255.0
UP BROADCAST RUNNING MULTICAST MTU:1500 Metric:1
同样的,可以直接重启一台机器或者关闭网卡测试,服务都能很快进行切换。
总结:
使用pacemaker+drbd+corosync实现postgresql的高可用,配置稍复杂,另外只能一台机器提供服务,另外一台只能做容灾,机器稍显浪费,但比起rhcs实现更加简洁,但优点是如果主库出现故障,主备可以来回往复的切换,不需要人工干预,这点比repmgr+pgbouncer实现更好些,生产中用什么方式,我建议是用此方案再加一台流复制做备库,这样可以保证数据有更好的保护。