1
2
3
4
5
6
7
|
######NOD1节点执行
sed
-i
's@\(HOSTNAME=\).*@\1nod1.allen.com@g'
/etc/sysconfig/network
hostname
nod1.allen.com
######NOD2节点执行
sed
-i
's@\(HOSTNAME=\).*@\1nod2.allen.com@g'
/etc/sysconfig/network
hostname
nod2.allen.com
注释:修改文件须重启系统生效,这里先修改文件然后执行命令修改主机名称可以不用重启
|
1
2
3
4
5
|
######在NOD1与NOD2节点执行
cat
>
/etc/hosts
<< EOF
192.168.137.225 nod1.allen.com nod1
192.168.137.222 nod2.allen.com nod2
EOF
|
1
2
|
######在NOD1与NOD2节点安装
rpm -ivh epel-release-6-8.noarch.rpm
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
######在NOD1节点上创建分区,分区大小必须与NOD2节点保持一样
[root@nod1 ~]
# fdisk /dev/sda
Command (m
for
help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 3
First cylinder (7859-15665, default 7859):
Using default value 7859
Last cylinder, +cylinders or +size{K,M,G} (7859-15665, default 15665): +2G
Command (m
for
help): w
[root@nod1 ~]
# partx /dev/sda #让内核重新读取分区
######查看内核有没有识别分区,如果没有需要重新启动,这里没有识别需要重启系统
[root@nod1 ~]
# cat /proc/partitions
major minor
#blocks name
8 0 125829120 sda
8 1 204800 sda1
8 2 62914560 sda2
253 0 20971520 dm-0
253 1 2097152 dm-1
253 2 10485760 dm-2
253 3 20971520 dm-3
[root@nod1 ~]
# reboot
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
|
######在NOD2节点上创建分区,分区大小必须与NOD1节点保持一样
[root@nod2 ~]
# fdisk /dev/sda
Command (m
for
help): n
Command action
e extended
p primary partition (1-4)
p
Partition number (1-4): 3
First cylinder (7859-15665, default 7859):
Using default value 7859
Last cylinder, +cylinders or +size{K,M,G} (7859-15665, default 15665): +2G
Command (m
for
help): w
[root@nod2 ~]
# partx /dev/sda #让内核重新读取分区
######查看内核有没有识别分区,如果没有需要重新启动,这里没有识别需要重启系统
[root@nod2 ~]
# cat /proc/partitions
major minor
#blocks name
8 0 125829120 sda
8 1 204800 sda1
8 2 62914560 sda2
253 0 20971520 dm-0
253 1 2097152 dm-1
253 2 10485760 dm-2
253 3 20971520 dm-3
[root@nod2 ~]
# reboot
|
1
2
3
4
5
6
7
8
|
######NOD1
[root@nod1 ~]
# ls drbd-*
drbd-8.4.3-33.el6.x86_64.rpm drbd-kmdl-2.6.32-358.el6-8.4.3-33.el6.x86_64.rpm
[root@nod1 ~]
# yum -y install drbd-*.rpm
######NOD2
[root@nod2 ~]
# ls drbd-*
drbd-8.4.3-33.el6.x86_64.rpm drbd-kmdl-2.6.32-358.el6-8.4.3-33.el6.x86_64.rpm
[root@nod2 ~]
# yum -y install drbd-*.rpm
|
1
2
3
4
5
6
7
8
9
10
|
ll
/etc/drbd
.conf;ll
/etc/drbd
.d/
-rw-r--r-- 1 root root 133 May 14 21:12
/etc/drbd
.conf
#主配置文件
total 4
-rw-r--r-- 1 root root 1836 May 14 21:12 global_common.conf
#全局配置文件
######查看主配置文件内容
cat
/etc/drbd
.conf
######主配置文件中包含了全局配置文件及"drbd.d/"目录下以.res结尾的文件
# You can find an example in /usr/share/doc/drbd.../drbd.conf.example
include
"drbd.d/global_common.conf"
;
include
"drbd.d/*.res"
;
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
|
[root@nod1 ~]
#vim /etc/drbd.d/global_common.conf
global {
usage-count no;
#是否参加DRBD使用统计,默认为yes
# minor-count dialog-refresh disable-ip-verification
}
common {
protocol C;
#使用DRBD的同步协议
handlers {
# These are EXAMPLE handlers only.
# They may have severe implications,
# like hard resetting the node under certain circumstances.
# Be careful when chosing your poison.
pri-on-incon-degr
"/usr/lib/drbd/notify-pri-on-incon-degr.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"
;
pri-lost-after-sb
"/usr/lib/drbd/notify-pri-lost-after-sb.sh; /usr/lib/drbd/notify-emergency-reboot.sh; echo b > /proc/sysrq-trigger ; reboot -f"
;
local
-io-error
"/usr/lib/drbd/notify-io-error.sh; /usr/lib/drbd/notify-emergency-shutdown.sh; echo o > /proc/sysrq-trigger ; halt -f"
;
# fence-peer "/usr/lib/drbd/crm-fence-peer.sh";
# split-brain "/usr/lib/drbd/notify-split-brain.sh root";
# out-of-sync "/usr/lib/drbd/notify-out-of-sync.sh root";
# before-resync-target "/usr/lib/drbd/snapshot-resync-target-lvm.sh -p 15 -- -c 16k";
# after-resync-target /usr/lib/drbd/unsnapshot-resync-target-lvm.sh;
}
startup {
# wfc-timeout degr-wfc-timeout outdated-wfc-timeout wait-after-sb
}
options {
# cpu-mask on-no-data-accessible
}
disk {
on-io-error detach;
#配置I/O错误处理策略为分离
# size max-bio-bvecs on-io-error fencing disk-barrier disk-flushes
# disk-drain md-flushes resync-rate resync-after al-extents
# c-plan-ahead c-delay-target c-fill-target c-max-rate
# c-min-rate disk-timeout
}
net {
cram-hmac-alg
"sha1"
;
#设置加密算法
shared-secret
"allendrbd"
;
#设置加密密钥
# protocol timeout max-epoch-size max-buffers unplug-watermark
# connect-int ping-int sndbuf-size rcvbuf-size ko-count
# allow-two-primaries cram-hmac-alg shared-secret after-sb-0pri
# after-sb-1pri after-sb-2pri always-asbp rr-conflict
# ping-timeout data-integrity-alg tcp-cork on-congestion
# congestion-fill congestion-extents csums-alg verify-alg
# use-rle
}
syncer {
rate 1024M;
#设置主备节点同步时的网络速率
}
}
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
[root@nod1 ~]
# vim /etc/drbd.d/drbd.res
resource drbd {
on nod1.allen.com {
#第个主机说明以on开头,后面是主机名称
device
/dev/drbd0
;
#DRBD设备名称
disk
/dev/sda3
;
#drbd0使用的磁盘分区为"sda3"
address 192.168.137.225:7789;
#设置DRBD监听地址与端口
meta-disk internal;
}
on nod2.allen.com {
device
/dev/drbd0
;
disk
/dev/sda3
;
address 192.168.137.222:7789;
meta-disk internal;
}
}
|
1
2
3
4
5
6
7
8
|
[root@nod1 ~]
# scp /etc/drbd.d/{global_common.conf,drbd.res} nod2:/etc/drbd.d/
The authenticity of host
'nod2 (192.168.137.222)'
can't be established.
RSA key fingerprint is 29:d3:28:85:20:a1:1f:2a:11:e5:88:
cd
:25:d0:95:c7.
Are you sure you want to
continue
connecting (
yes
/no
)?
yes
Warning: Permanently added
'nod2'
(RSA) to the list of known hosts.
root@nod2's password:
global_common.conf 100% 1943 1.9KB
/s
00:00
drbd.res 100% 318 0.3KB
/s
00:00
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
|
######在NOD1节点上初始化资源并启动服务
[root@nod1 ~]
# drbdadm create-md drbd
Writing meta data...
initializing activity log
NOT initializing bitmap
lk_bdev_save(
/var/lib/drbd/drbd-minor-0
.lkbd) failed: No such
file
or directory
New drbd meta data block successfully created.
#提示已经创建成功
lk_bdev_save(
/var/lib/drbd/drbd-minor-0
.lkbd) failed: No such
file
or directory
######启动服务
[root@nod1 ~]
# service drbd start
Starting DRBD resources: [
create res: drbd
prepare disk: drbd
adjust disk: drbd
adjust net: drbd
]
..........
***************************************************************
DRBD's startup script waits
for
the peer node(s) to appear.
- In
case
this node was already a degraded cluster before the
reboot the timeout is 0 seconds. [degr-wfc-timeout]
- If the peer was available before the reboot the timeout will
expire after 0 seconds. [wfc-timeout]
(These values are
for
resource
'drbd'
; 0 sec -> wait forever)
To abort waiting enter
'yes'
[ 12]:
yes
######查看监听端口
[root@nod1 ~]
# ss -tanl |grep 7789
LISTEN 0 5 192.168.137.225:7789 *:*
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
|
######在NOD2节点上初始化资源并启动服务
[root@nod2 ~]
# drbdadm create-md drbd
Writing meta data...
initializing activity log
NOT initializing bitmap
lk_bdev_save(
/var/lib/drbd/drbd-minor-0
.lkbd) failed: No such
file
or directory
New drbd meta data block successfully created.
lk_bdev_save(
/var/lib/drbd/drbd-minor-0
.lkbd) failed: No such
file
or directory
######启动服务
[root@nod2 ~]
# service drbd start
Starting DRBD resources: [
create res: drbd
prepare disk: drbd
adjust disk: drbd
adjust net: drbd
]
######查看监听地址与端口
[root@nod2 ~]
# netstat -anput|grep 7789
tcp 0 0 192.168.137.222:42345 192.168.137.225:7789 ESTABLISHED -
tcp 0 0 192.168.137.222:7789 192.168.137.225:42325 ESTABLISHED -
######查看DRBD启动状态
[root@nod2 ~]
# drbd-overview
0:drbd
/0
Connected Secondary
/Secondary
Inconsistent
/Inconsistent
C r-----
|
1
2
|
[root@nod1 ~]
# drbdadm cstate drbd #drbd为资源名称
Connected
|
1
2
3
4
5
6
7
8
9
10
11
|
[root@nod1 ~]
# drbdadm role drbd
Secondary
/Secondary
[root@nod1 ~]
# cat /proc/drbd
version: 8.4.3 (api:1
/proto
:86-101)
GIT-
hash
: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-05-27 04:30:21
0: cs:Connected ro:Secondary
/Secondary
ds:Inconsistent
/Inconsistent
C r-----
ns:0 nr:0 dw:0 dr:0 al:0 bm:0 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:2103412
注释:
Parimary 主:资源目前为主,并且可能正在被读取或写入,如果不是双主只会出现在两个节点中的其中一个节点上
Secondary 次:资源目前为次,正常接收对等节点的更新
Unknown 未知:资源角色目前未知,本地的资源不会出现这种状态
|
1
2
|
[root@nod1 ~]
# drbdadm dstate drbd
Inconsistent
/Inconsistent
|
1
2
3
4
5
6
|
######手动启用资源
drbdadm up <resource>
######手动禁用资源
drbdadm down <resource>
注释:
resource:为资源名称;当然也可以使用all表示[停用|启用]所有资源
|
1
2
3
4
5
|
######升级资源
drbdadm primary <resource>
######降级资源
drbdadm secondary <resource>
注释:在单主模式下的DRBD,两个节点同时处于连接状态,任何一个节点都可以在特定的时间内变成主;但两个节点中只能一为主,如果已经有一个主,需先降级才可能升级;在双主模式下没有这个限制
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
|
[root@nod1 ~]
# drbdadm -- --overwrite-data-of-peer primary drbd
[root@nod1 ~]
# cat /proc/drbd #查看同步进度
version: 8.4.3 (api:1
/proto
:86-101)
GIT-
hash
: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-05-27 04:30:21
0: cs:SyncSource ro:Primary
/Secondary
ds:UpToDate
/Inconsistent
C r---n-
ns:1897624 nr:0 dw:0 dr:1901216 al:0 bm:115 lo:0 pe:3 ua:3 ap:0 ep:1 wo:f oos:207988
[=================>..]
sync
'ed: 90.3% (207988
/2103412
)K
finish: 0:00:07 speed: 26,792 (27,076) K
/sec
######当同步完成时如以下状态
version: 8.4.3 (api:1
/proto
:86-101)
GIT-
hash
: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-05-27 04:30:21
0: cs:Connected ro:Primary
/Secondary
ds:UpToDate
/UpToDate
C r-----
ns:2103412 nr:0 dw:0 dr:2104084 al:0 bm:129 lo:0 pe:0 ua:0 ap:0 ep:1 wo:f oos:0
注释: drbd:为资源名称
######查看同步进度也可使用以下命令
drbd-overview
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
|
######格式化文件系统
[root@nod1 ~]
# mkfs.ext4 /dev/drbd0
######挂载文件系统
[root@nod1 ~]
# mount /dev/drbd0 /mnt/
######查看挂载
[root@nod1 ~]
# mount |grep drbd0
/dev/drbd0
on
/mnt
type
ext4 (rw)
注释:
"/dev/drbd0"
为资源中定义已定义的资源名称
######查看DRBD状态
[root@nod1 ~]
# drbd-overview
0:drbd
/0
Connected Primary
/Secondary
UpToDate
/UpToDate
C r-----
注释:
Primary:当前节点为主;在前面为当前节点
Secondary:备用节点为次
|
1
2
3
4
5
|
[root@nod1 ~]
# mkdir /mnt/test
[root@nod1 ~]
# ls /mnt/
lost+found
test
######在切换主节点时必须保证资源不在使用
[root@nod1 ~]
# umount /mnt/
|
1
2
3
4
5
6
7
8
9
10
|
######先把当前主节点降级为次
[root@nod1 ~]
# drbdadm secondary drbd
######查看DRBD状态
[root@nod1 ~]
# drbd-overview
0:drbd
/0
Connected Secondary
/Secondary
UpToDate
/UpToDate
C r-----
######在NOD2节点升级
[root@nod2 ~]
# drbdadm primary drbd
######查看DRBD状态
[root@nod2 ~]
# drbd-overview
0:drbd
/0
Connected Primary
/Secondary
UpToDate
/UpToDate
C r-----
|
1
2
3
|
[root@nod2 ~]
# mount /dev/drbd0 /mnt/
[root@nod2 ~]
# ls /mnt/
lost+found
test
|
1
2
3
4
5
|
[root@nod2 ~]
# drbd-overview
0:drbd
/0
WFConnection Primary
/Unknown
UpToDate
/DUnknown
C r-----
/mnt
ext4 2.0G 68M 1.9G 4%
[root@nod1 ~]
# drbd-overview
0:drbd
/0
StandAlone Secondary
/Unknown
UpToDate
/DUnknown
r-----
######由上可以看到两个节点已经无法通信;NOD2为主节点,NOD1为备节点
|
1
2
3
4
5
6
|
[root@nod1 ~]
# drbdadm primary drbd
[root@nod1 ~]
# drbd-overview
0:drbd
/0
StandAlone Primary
/Unknown
UpToDate
/DUnknown
r-----
[root@nod1 ~]
# mount /dev/drbd0 /mnt/
[root@nod1 ~]
# mount | grep drbd0
/dev/drbd0
on
/mnt
type
ext4 (rw)
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
[root@nod2 ~]
# tail -f /var/log/messages
Sep 19 01:56:06 nod2 kernel: d-con drbd: Terminating drbd_a_drbd
Sep 19 01:56:06 nod2 kernel: block drbd0: helper
command
:
/sbin/drbdadm
initial-
split
-brain minor-0
exit
code 0 (0x0)
Sep 19 01:56:06 nod2 kernel: block drbd0: Split-Brain detected but unresolved, dropping connection!
Sep 19 01:56:06 nod2 kernel: block drbd0: helper
command
:
/sbin/drbdadm
split
-brain minor-0
Sep 19 01:56:06 nod2 kernel: block drbd0: helper
command
:
/sbin/drbdadm
split
-brain minor-0
exit
code 0 (0x0)
Sep 19 01:56:06 nod2 kernel: d-con drbd: conn( NetworkFailure -> Disconnecting )
Sep 19 01:56:06 nod2 kernel: d-con drbd: error receiving ReportState, e: -5 l: 0!
Sep 19 01:56:06 nod2 kernel: d-con drbd: Connection closed
Sep 19 01:56:06 nod2 kernel: d-con drbd: conn( Disconnecting -> StandAlone )
Sep 19 01:56:06 nod2 kernel: d-con drbd: receiver terminated
Sep 19 01:56:06 nod2 kernel: d-con drbd: Terminating drbd_r_drbd
Sep 19 01:56:18 nod2 kernel: block drbd0: role( Primary -> Secondary )
|
1
2
3
4
|
[root@nod1 ~]
# drbdadm role drbd
Primary
/Unknown
[root@nod2 ~]
# drbdadm role drbd
Primary
/Unknown
|
1
2
3
4
5
|
[root@nod1 ~]
# drbd-overview
0:drbd
/0
StandAlone Primary
/Unknown
UpToDate
/DUnknown
r-----
/mnt
ext4 2.0G 68M 1.9G 4%
[root@nod2 ~]
# drbd-overview
0:drbd
/0
WFConnection Primary
/Unknown
UpToDate
/DUnknown
C r-----
/mnt
ext4 2.0G 68M 1.9G 4%
######由上可见,状态为StandAlone时,主备节点是不会通信的
|
1
2
3
4
5
6
7
8
9
10
11
12
|
[root@nod1 ~]
# service drbd status
drbd driver loaded OK; device status:
version: 8.4.3 (api:1
/proto
:86-101)
GIT-
hash
: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-05-27 04:30:21
m:res cs ro ds p mounted fstype
0:drbd StandAlone Primary
/Unknown
UpToDate
/DUnknown
r----- ext4
[root@nod2 ~]
# service drbd status
drbd driver loaded OK; device status:
version: 8.4.3 (api:1
/proto
:86-101)
GIT-
hash
: 89a294209144b68adb3ee85a73221f964d3ee515 build by gardner@, 2013-05-27 04:30:21
m:res cs ro ds p mounted fstype
0:drbd WFConnection Primary
/Unknown
UpToDate
/DUnknown
C
/mnt
ext4
|
1
2
3
4
5
6
7
8
9
10
11
12
13
|
[root@nod1 ~]
# umount /mnt/
[root@nod1 ~]
# drbdadm disconnect drbd
drbd: Failure: (162) Invalid configuration request
additional info from kernel:
unknown connection
Command
'drbdsetup disconnect ipv4:192.168.137.225:7789 ipv4:192.168.137.222:7789'
terminated with
exit
code 10
[root@nod1 ~]
# drbdadm secondary drbd
[root@nod1 ~]
# drbd-overview
0:drbd
/0
StandAlone Secondary
/Unknown
UpToDate
/DUnknown
r-----
[root@nod1 ~]
# drbdadm connect --discard-my-data drbd
######执行完以上三步后,你查看会发现还是不可用
[root@nod1 ~]
# drbd-overview
0:drbd
/0
WFConnection Secondary
/Unknown
UpToDate
/DUnknown
C r-----
|
1
2
3
4
5
6
7
|
[root@nod2 ~]
# drbdadm connect drbd
######查看节点连接状态
[root@nod2 ~]
# drbd-overview
0:drbd
/0
Connected Primary
/Secondary
UpToDate
/UpToDate
C r-----
/mnt
ext4 2.0G 68M 1.9G 4%
[root@nod1 ~]
# drbd-overview
0:drbd
/0
Connected Secondary
/Primary
UpToDate
/UpToDate
C r-----
######由上可见已经恢复到正常运行状态
|