昨天在某银行实施oracle 11.2.0.3.7 RAC,rhel5.8 x64平台。所有事情做的可谓超乎想象的顺利,重启服务器,完蛋了!
服务器重启了,进入操作系统后,ps -ef |grep ora就返回一行,同样ps -ef | grep asm也是一行。
RAC架构压根没起来,更别说数据库了。
crsctl check crs看到的结果是,只有oha是online。懵了一下,第一反应是,存储出问题了。
之前一直担心p2000多路径问题。
往上说这个方法可以解决问题:
blacklist {
devnode "^(dm-)[0-9]*"
devnode "^hd[a-z]"
}
defaults {
path_grouping_policy multibus
failback immediate
no_path_retry fail
user_friendly_names yes
}
写在哪?亲,肯定清楚。
重启服务,得到的结果如下:
[root@rac1 ~]# /sbin/multipath -ll
mpath2 (3600c0ff0001504939576145201000000) dm-1 HP,P2000 G3 FC
[size=11T][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=100][active]
\_ 3:0:0:0 sda 8:0 [active][ready]
\_ 4:0:0:0 sdl 8:176 [active][ready]
mpath1 (3600508b1001c6ea8bce433987f22757f) dm-0 HP,LOGICAL VOLUME
[size=559G][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=1][active]
\_ #:#:#:# cciss!c0d1 104:16 [active][ready]
mpath7 (3600c0ff000150d9e20eb155201000000) dm-6 HP,P2000 G3 FC
[size=1.5T][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=100][enabled]
\_ 3:0:1:4 sdf 8:80 [active][ready]
\_ 4:0:2:4 sdv 65:80 [active][ready]
\_ round-robin 0 [prio=20][enabled]
\_ 3:0:2:4 sdk 8:160 [active][ready]
\_ 4:0:1:4 sdq 65:0 [active][ready]
mpath6 (3600c0ff000150d9ef1ea155201000000) dm-5 HP,P2000 G3 FC
[size=186G][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=100][enabled]
\_ 3:0:1:3 sde 8:64 [active][ready]
\_ 4:0:2:3 sdu 65:64 [active][ready]
\_ round-robin 0 [prio=20][enabled]
\_ 3:0:2:3 sdj 8:144 [active][ready]
\_ 4:0:1:3 sdp 8:240 [active][ready]
mpath5 (3600c0ff000150d9ed0ea155201000000) dm-4 HP,P2000 G3 FC
[size=954M][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=100][enabled]
\_ 3:0:1:2 sdd 8:48 [active][ready]
\_ 4:0:2:2 sdt 65:48 [active][ready]
\_ round-robin 0 [prio=20][enabled]
\_ 3:0:2:2 sdi 8:128 [active][ready]
\_ 4:0:1:2 sdo 8:224 [active][ready]
mpath4 (3600c0ff000150d9eb1ea155201000000) dm-3 HP,P2000 G3 FC
[size=954M][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=100][enabled]
\_ 3:0:1:1 sdc 8:32 [active][ready]
\_ 4:0:2:1 sds 65:32 [active][ready]
\_ round-robin 0 [prio=20][enabled]
\_ 3:0:2:1 sdh 8:112 [active][ready]
\_ 4:0:1:1 sdn 8:208 [active][ready]
mpath3 (3600c0ff000150d9e4dea155201000000) dm-2 HP,P2000 G3 FC
[size=954M][features=1 queue_if_no_path][hwhandler=0][rw]
\_ round-robin 0 [prio=100][enabled]
\_ 3:0:1:0 sdb 8:16 [active][ready]
\_ 4:0:2:0 sdr 65:16 [active][ready]
\_ round-robin 0 [prio=20][enabled]
\_ 3:0:2:0 sdg 8:96 [active][ready]
\_ 4:0:1:0 sdm 8:192 [active][ready]
这不是我想要的,因为我担心两个节点因为插一个U盘,导致两边不一致。(教训,已经发生过了)
后来想个办法解决,其实也是老办法:
blacklist {
# wwid 26353900f02796769
devnode "^(ram|raw|loop|fd|md|dm-|sr|scd|st)[0-9]*"
devnode "^hd[a-z][[0-9]*]"
# devnode "^cciss!c[0-9]d[0-9]*"
# device {
# vendor "HP"
# product "OPEN-*"
# }
}
defaults {
udev_dir /dev
polling_interval 10
selector "round-robin 0"
path_grouping_policy failover
getuid_callout "/sbin/scsi_id -g -u -s /block/%n"
prio_callout "/bin/true"
path_checker tur
rr_min_io 100
rr_weight uniform
failback immediate
no_path_retry 12
user_friendly_names yes
}
multipaths {
multipath {
wwid 3600c0ff000150d9e288f245201000000
alias mapper0
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
multipath {
wwid 3600c0ff000150d9eaa8e245201000000
alias mapper1
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
multipath {
wwid 3600c0ff000150d9e428f245201000000
alias mapper2
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
multipath {
wwid 3600c0ff000150d9e438c245201000000
alias mapper3
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
multipath {
wwid 3600c0ff000150d9e068c245201000000
alias mapper4
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
multipath {
wwid 3600c0ff0001504939576145201000000
alias mapper5
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
multipath {
wwid 3600508b1001c6ea8bce433987f22757f
alias mapper6
path_grouping_policy group_by_prio
path_selector "round-robin 0"
prio_callout "/sbin/mpath_prio_alua /dev/%n"
rr_weight uniform
failback immediate
no_path_retry 18
rr_min_io 100
}
}
记住:该文件前面的东西都是“废物”,可以删除或者注释掉。
输出的结果是我想要的:/dev/mapper/mapper[0-5]
但是,尽管如此还是发生了开头说的问题,查看GI的告警日志,看到如下信息:
1087203648]ERROR: -15(asmlib ASM:/opt/oracle/extapi/64/asm/orcl/1/libasm.so op asm_open error Operation not permitted
迷茫了一下 :asm lib出问题了?
将asm相关的命令敲一遍,没问题!
搜下:multipath问题,大爷的!
在某个文件(亲肯定知道)作如下修改:
cat /etc/sysconfig/oracleasm-_dev_oracleasm
#
# This is a configuration file for automatic loading of the Oracle
# Automatic Storage Management library kernel driver. It is generated
# By running /etc/init.d/oracleasm configure. Please use that method
# to modify this file
#
# ORACLEASM_ENABLED: 'true' means to load the driver on boot.
ORACLEASM_ENABLED=true
# ORACLEASM_UID: Default user owning the /dev/oracleasm mount point.
ORACLEASM_UID=grid
# ORACLEASM_GID: Default group owning the /dev/oracleasm mount point.
ORACLEASM_GID=asmadmin
# ORACLEASM_SCANBOOT: 'true' means scan for ASM disks on boot.
ORACLEASM_SCANBOOT=true
# ORACLEASM_SCANORDER: Matching patterns to order disk scanning
ORACLEASM_SCANORDER="dm"
# ORACLEASM_SCANEXCLUDE: Matching patterns to exclude disks from scan
ORACLEASM_SCANEXCLUDE="sd"
# ORACLEASM_USE_LOGICAL_BLOCK_SIZE: 'true' means use the logical block size
# reported by the underlying disk instead of the physical. The default
# is 'false'
ORACLEASM_USE_LOGICAL_BLOCK_SIZE=false
在我们的RHEL 5.8环境中是使用了Multipath来对多块磁盘做多路径处理,因此需要包括dm开头的磁盘,而忽略sd开头的磁盘。
重启服务器,问题解决!