重建CRS实验记录
1、收集现有信息
HOSTS信息
[root@racdb1 ~]# more /etc/hosts
# Do not remove the following line, or various programs
# that require network functionality will fail.
127.0.0.1 localhost.localdomain localhost
::1 localhost6.localdomain6 localhost6
# My Old Oracle RAC
#192.168.9.1 racdb1
#192.168.9.2 racdb2
#192.168.27.1 racdb1-priv
#192.168.27.2 racdb2-priv
#192.168.9.11 racdb1-vip
#192.168.9.12 racdb2-vip
# My New Oracle RAC
192.168.19.1 racdb1
192.168.19.2 racdb2
192.168.127.1 racdb1-priv
192.168.127.2 racdb2-priv
192.168.19.11 racdb1-vip
192.168.19.12 racdb2-vip
各节点IP及网关情况
[root@racdb1 ~]# cat /etc/sysconfig/network-scripts/ifcfg-eth0
# Advanced Micro Devices [AMD] 79c970 [PCnet32 LANCE]
DEVICE=eth0
BOOTPROTO=none
BROADCAST=192.168.19.255
HWADDR=00:0C:29:DA:1C:48
IPADDR=192.168.19.1
NETMASK=255.255.255.0
NETWORK=192.168.19.0
ONBOOT=yes
GATEWAY=192.168.19.27
TYPE=Ethernet
USERCTL=no
IPV6INIT=no
PEERDNS=yes
[root@racdb1 ~]# cat /etc/sysconfig/network-scripts/ifcfg-eth1
# Advanced Micro Devices [AMD] 79c970 [PCnet32 LANCE]
DEVICE=eth1
BOOTPROTO=none
BROADCAST=192.168.127.255
HWADDR=00:0C:29:DA:1C:52
IPADDR=192.168.127.1
NETMASK=255.255.255.0
NETWORK=192.168.127.0
ONBOOT=yes
TYPE=Ethernet
USERCTL=no
IPV6INIT=no
PEERDNS=yes
[root@racdb2 ~]# cat /etc/sysconfig/network-scripts/ifcfg-eth0
# Advanced Micro Devices [AMD] 79c970 [PCnet32 LANCE]
DEVICE=eth0
BOOTPROTO=none
ONBOOT=yes
HWADDR=00:0c:29:74:40:28
NETMASK=255.255.255.0
IPADDR=192.168.19.2
GATEWAY=192.168.19.27
TYPE=Ethernet
USERCTL=no
IPV6INIT=no
PEERDNS=yes
[root@racdb2 ~]# cat /etc/sysconfig/network-scripts/ifcfg-eth1
# Advanced Micro Devices [AMD] 79c970 [PCnet32 LANCE]
DEVICE=eth1
BOOTPROTO=none
ONBOOT=yes
HWADDR=00:0c:29:74:40:32
TYPE=Ethernet
NETMASK=255.255.255.0
IPADDR=192.168.127.2
USERCTL=no
IPV6INIT=no
PEERDNS=yes
CRS收集的信息
[root@racdb1 ~]# crs_stat -t
Name Type Target State Host
------------------------------------------------------------
ora.racdb.db application ONLINE ONLINE racdb1
ora....b1.inst application ONLINE ONLINE racdb1
ora....b2.inst application ONLINE ONLINE racdb2
ora...._taf.cs application ONLINE ONLINE racdb1
ora....db1.srv application ONLINE ONLINE racdb1
ora....db2.srv application ONLINE ONLINE racdb2
ora....SM1.asm application ONLINE ONLINE racdb1
ora....B1.lsnr application ONLINE ONLINE racdb1
ora.racdb1.gsd application ONLINE ONLINE racdb1
ora.racdb1.ons application ONLINE ONLINE racdb1
ora.racdb1.vip application ONLINE ONLINE racdb1
ora....SM2.asm application ONLINE ONLINE racdb2
ora....B2.lsnr application ONLINE ONLINE racdb2
ora.racdb2.gsd application ONLINE ONLINE racdb2
ora.racdb2.ons application ONLINE ONLINE racdb2
ora.racdb2.vip application ONLINE ONLINE racdb2
[root@racdb1 ~]# crs_stat -p | grep racdb.db
NAME=ora.racdb.db
[root@racdb1 ~]# crs_stat -p | grep inst
NAME=ora.racdb.racdb1.inst
NAME=ora.racdb.racdb2.inst
DESCRIPTION=CRS application for ASM instance
DESCRIPTION=CRS application for ASM instance
[root@racdb1 ~]# crs_stat -p | grep srv
NAME=ora.racdb.srv_taf.cs
NAME=ora.racdb.srv_taf.racdb1.srv
NAME=ora.racdb.srv_taf.racdb2.srv
[root@racdb1 ~]# crs_stat -p | grep asm
NAME=ora.racdb1.ASM1.asm
NAME=ora.racdb2.ASM2.asm
[root@racdb1 ~]# crs_stat -p | grep gsd
NAME=ora.racdb1.gsd
NAME=ora.racdb2.gsd
[root@racdb1 ~]# crs_stat -p | grep ons
NAME=ora.racdb1.ons
NAME=ora.racdb2.ons
[root@racdb1 ~]# crs_stat -p | grep vip
REQUIRED_RESOURCES=ora.racdb1.vip
NAME=ora.racdb1.vip
REQUIRED_RESOURCES=ora.racdb2.vip
NAME=ora.racdb2.vip
[root@racdb1 ~]# crs_stat -p | grep lsnr
NAME=ora.racdb1.LISTENER_RACDB1.lsnr
NAME=ora.racdb2.LISTENER_RACDB2.lsnr
节点信息
[root@racdb1 ~]# olsnodes -n -p -i
racdb1 1 racdb1-priv racdb1-vip
racdb2 2 racdb2-priv racdb2-vip
网卡信息
[root@racdb1 ~]# oifcfg getif
eth0 192.168.19.0 global public
eth1 192.168.127.0 global cluster_interconnect
Votedisk磁盘
[root@racdb1 ~]# crsctl query css votedisk
0. 0 /dev/raw/raw3
1. 0 /dev/raw/raw4
2. 0 /dev/raw/raw5
OCR磁盘
[root@racdb1 ~]# ocrcheck
Status of Oracle Cluster Registry is as follows :
Version : 2
Total space (kbytes) : 524024
Used space (kbytes) : 4584
Available space (kbytes) : 519440
ID : 861718794
Device/File Name : /dev/raw/raw1
Device/File integrity check succeeded
Device/File Name : /dev/raw/raw2
Device/File integrity check succeeded
Cluster registry integrity check succeeded
Srvctl收集的信息
[root@racdb1 ~]# srvctl config database -d racdb -a
racdb1 racdb1 /oracle/app/oracle/product/10.2.0/db_1
racdb2 racdb2 /oracle/app/oracle/product/10.2.0/db_1
DB_NAME: null
ORACLE_HOME: /oracle/app/oracle/product/10.2.0/db_1
SPFILE: null
DOMAIN: null
DB_ROLE: null
START_OPTIONS: null
POLICY: AUTOMATIC
ENABLE FLAG: DB ENABLED
[root@racdb2 ~]# srvctl config nodeapps -n racdb1 -a
VIP exists.: /racdb1-vip/192.168.19.11/255.255.255.0/eth0
[root@racdb2 ~]# srvctl config nodeapps -n racdb2 -a
VIP exists.: /racdb2-vip/192.168.19.12/255.255.255.0/eth0
2、修改规划
IP、VIP和Pri-IP信息
网关:192.168.9.27
# My Old Oracle RAC
192.168.9.1 rac1
192.168.9.2 rac2
192.168.27.1 rac1-priv
192.168.27.2 rac2-priv
192.168.9.11 rac1-vip
192.168.9.12 rac2-vip
# My New Oracle RAC
#192.168.19.1 racdb1
#192.168.19.2 racdb2
#192.168.127.1 racdb1-priv
#192.168.127.2 racdb2-priv
#192.168.19.11 racdb1-vip
#192.168.19.12 racdb2-vip
修改OCR和Vote Disk
OCR磁盘:
/dev/raw/raw1
/dev/raw/raw9
Vote磁盘:
/dev/raw/raw3
/dev/raw/raw4
/dev/raw/raw10
3、开始重建
1)、停止两个节点上所有资源并禁用CRS;
2)、修改IP和主机名,并修改hosts文件内容,备份各个节点的Listener.ora和tnsnames.ora文件,最后重启各个节点主机;
3)、检查IP、主机名、每个节点间是否能互通、能否ping通网关、ssh还是否正常
重新配置ssh
在两个节点分别执行:
[root@rac1 ~]# cd /
[root@rac1 /]# find | grep .ssh
……
./home/oracle/.ssh
……
[root@rac1 ~]# su - oracle
[oracle@rac1 ~]$ cd /home/oracle/.ssh
[oracle@rac1 .ssh]$ ll
total 24
-rw------- 1 oracle oinstall 1996 Feb 25 10:52 authorized_keys
-rw------- 1 oracle oinstall 668 Feb 25 10:49 id_dsa
-rw-r--r-- 1 oracle oinstall 603 Feb 25 10:49 id_dsa.pub
-rw------- 1 oracle oinstall 1679 Feb 25 10:49 id_rsa
-rw-r--r-- 1 oracle oinstall 395 Feb 25 10:49 id_rsa.pub
-rw-r--r-- 1 oracle oinstall 1588 Mar 4 08:20 known_hosts
[oracle@rac1 .ssh]$ rm *.*
[oracle@rac1 .ssh]$ ll
total 16
-rw------- 1 oracle oinstall 1996 Feb 25 10:52 authorized_keys
-rw------- 1 oracle oinstall 668 Feb 25 10:49 id_dsa
-rw------- 1 oracle oinstall 1679 Feb 25 10:49 id_rsa
-rw-r--r-- 1 oracle oinstall 1588 Mar 4 08:20 known_hosts
[oracle@rac1 .ssh]$ rm au*
[oracle@rac1 .ssh]$ ll
total 12
-rw------- 1 oracle oinstall 668 Feb 25 10:49 id_dsa
-rw------- 1 oracle oinstall 1679 Feb 25 10:49 id_rsa
-rw-r--r-- 1 oracle oinstall 1588 Mar 4 08:20 known_hosts
[oracle@rac1 .ssh]$ rm id*
[oracle@rac1 .ssh]$ ll
total 4
-rw-r--r-- 1 oracle oinstall 1588 Mar 4 08:20 known_hosts
[oracle@rac1 .ssh]$ rm kn*
[oracle@rac1 .ssh]$ ll
total 0
[oracle@rac1 .ssh]$ cd ..
[oracle@rac1 ~]$ rmdir .ssh
在其每个节点依次执行:
$ mkdir ~/.ssh
$ chmod 700 ~/.ssh
$ /usr/bin/ssh-keygen -t rsa
$ /usr/bin/ssh-keygen -t dsa
$ touch ~/.ssh/authorized_keys
$ cd ~/.ssh
$ ssh rac1 cat /home/oracle/.ssh/id_rsa.pub >> authorized_keys
$ ssh rac1 cat /home/oracle/.ssh/id_dsa.pub >> authorized_keys
$ ssh rac2 cat /home/oracle/.ssh/id_rsa.pub >> authorized_keys
$ ssh rac2 cat /home/oracle/.ssh/id_dsa.pub >> authorized_keys
#rac1节点上执行
$scp authorized_keys rac1:/home/oracle/.ssh/
#rac2节点上执行
$scp authorized_keys rac2:/home/oracle/.ssh/
$ chmod 600 ~/.ssh/authorized_keys
$ exec /usr/bin/ssh-agent $SHELL
$ /usr/bin/ssh-add
$ ssh rac1 date
$ ssh rac2 date
4)、准备新添加的OCR磁盘和Vote磁盘并为它们授予相应访问权限
5)、破坏OCR,Voting磁盘
在两边用root执行:
/oracle/app/crs/install/rootdelete.sh
Shutting down Oracle Cluster Ready Services (CRS):
/etc/init.d/init.cssd: line 875: /etc/oracle/scls_scr/rac1/root/cssrun: No such file or directory
/etc/init.d/init.crsd: line 134: /etc/oracle/scls_scr/rac1/root/crsdboot: No such file or directory
/etc/init.d/init.cssd: line 875: /etc/oracle/scls_scr/rac1/root/cssrun: No such file or directory
Jun 24 11:22:11.113 | ERR | failed to connect to daemon, errno(111)
/etc/init.d/init.cssd: line 1332: /etc/oracle/scls_scr/rac1/root/cssfboot: No such file or directory
/bin/touch: cannot touch `/etc/oracle/scls_scr/rac1/root/nooprocd': No such file or directory
/bin/touch: cannot touch `/etc/oracle/scls_scr/rac1/root/noclsvmon': No such file or directory
/bin/touch: cannot touch `/etc/oracle/scls_scr/rac1/root/noclsomon': No such file or directory
clsz init failed while trying to stop resources.
Possible cause: CRSD is down.
Shutdown has begun. The daemons should exit soon.
Checking to see if Oracle CRS stack is down...
Oracle CRS stack is not running.
Oracle CRS stack is down now.
Removing script for Oracle Cluster Ready services
Updating ocr file for downgrade
Cleaning up SCR settings in '/etc/oracle/scls_scr'
注:以上脚本的作用包含了清除/etc/oracle/scls_scr文件夹的操作
/oracle/app/crs/install/rootdeinstall.sh
Removing contents from OCR mirror device
2560+0 records in
2560+0 records out
10485760 bytes (10 MB) copied, 1.52677 seconds, 6.9 MB/s
Removing contents from OCR device
2560+0 records in
2560+0 records out
10485760 bytes (10 MB) copied, 1.66152 seconds, 6.3 MB/s
在其中一个节点上执行清除OCR磁盘和Vote磁盘的工作:
[root@rac1 install]# dd if=/dev/zero of=/dev/raw/raw1 bs=1024000 count=120
120+0 records in
120+0 records out
122880000 bytes (123 MB) copied, 3.36957 seconds, 36.5 MB/s
[root@rac1 install]# dd if=/dev/zero of=/dev/raw/raw2 bs=1024000 count=120
120+0 records in
120+0 records out
122880000 bytes (123 MB) copied, 3.24333 seconds, 37.9 MB/s
[root@rac1 install]# dd if=/dev/zero of=/dev/raw/raw3 bs=1024000 count=120
120+0 records in
120+0 records out
122880000 bytes (123 MB) copied, 3.30341 seconds, 37.2 MB/s
[root@rac1 install]# dd if=/dev/zero of=/dev/raw/raw4 bs=1024000 count=120
120+0 records in
120+0 records out
122880000 bytes (123 MB) copied, 3.30384 seconds, 37.2 MB/s
[root@rac1 install]# dd if=/dev/zero of=/dev/raw/raw5 bs=1024000 count=120
120+0 records in
120+0 records out
122880000 bytes (123 MB) copied, 3.29728 seconds, 37.3 MB/s
6)、重建CRS前的重新配置工作
在两个节点上修改/etc/oracle/ocr.loc文件,将文件中的ocrconfig_loc=后的地址改为新的存储ocr的祼设备名(如果用的是集群文件系统,则为文件名)。
但事实是我在/etc/oracle/下根本没有ocr.loc文件了?!先暂时跳过这一步,继续后发现在调用root.sh脚本时,会根据rootconfig里的参数重新生成该文件,因此,个人觉得此时在这里删除该文件也可以!
修改$CRS_HOME/install/paramfile.crs和$CRS_HOME/install/rootconfig文件中的相关参数。
7)、运行脚本重建CRS
在每个节点依次执行(root 用户)root.sh($CRS_HOME/root.sh)脚本:
完成后可以看到有一些资源已经注册到CRS上了
[root@rac1 ~]# crs_stat -t
Name Type Target State Host
------------------------------------------------------------
ora.rac1.gsd application ONLINE ONLINE rac1
ora.rac1.ons application ONLINE ONLINE rac1
ora.rac1.vip application ONLINE ONLINE rac1
ora.rac2.gsd application ONLINE ONLINE rac2
ora.rac2.ons application ONLINE ONLINE rac2
ora.rac2.vip application ONLINE ONLINE rac2
8)、使用oifcfg配置db使用的共有、私连网络(Oracle)
[root@rac1 ~]# oifcfg getif
[root@rac1 ~]# oifcfg setif -global eth0/192.168.9.0:public
[root@rac1 ~]# oifcfg setif -global eth1/192.168.27.0:cluster_interconnect
[root@rac1 ~]# oifcfg getif
eth0 192.168.9.0 global public
eth1 192.168.27.0 global cluster_interconnect
9)、注册其他资源到集群(oracle用户)
①注册监听到集群
使用图形化的netca命令,先删除旧的监听再添加新的监听
②注册ASM实例到集群(如果使用ASM)
[oracle@rac1 ~]$ srvctl add asm -n rac1 -i +ASM1 -o $ORACLE_HOME
[oracle@rac1 ~]$ srvctl add asm -n rac2 -i +ASM2 -o $ORACLE_HOME
[oracle@rac1 ~]$ srvctl start asm -n rac1
[oracle@rac1 ~]$ srvctl start asm -n rac2
③注册instance/database到集群
[oracle@rac1 ~]srvctl add database -d racdb -o $ORACLE_HOMEITPUB个人空间
X2{;k:Y ]B,
[oracle@rac1 ~]srvctl add instance -d racdb -i racdb1 -n rac1
[oracle@rac1 ~]:C VPX!Rsig22494861srvctl add instance -d racdb -i racdb2 -n rac2ITPUB个人空间"C-uM�n4`
[oracle@rac1 ~]srvctl config database -d racdb
[oracle@rac1 ~]HC |
VG^ Gdv22494861 srvctl start database -d racdb
在以上最后一个步骤,启动数据库时报错,原因是修改主机名后的监听器中相关项和原来的不同导致,因此需要作出相应的修改。
= 4 * GB3 ④注册services到集群
[oracle@rac1 admin]$ srvctl add service -d racdb -s srv_taf -r racdb1,racdb2 -P BASIC
[oracle@rac1 admin]$ srvctl enable service -d racdb -s srv_taf -i racdb1
[oracle@rac1 admin]$ srvctl enable service -d racdb -s srv_taf -i racdb2
[oracle@rac1 admin]$ srvctl start service -d racdb -s srv_taf
[oracle@rac1 admin]$ sqlplus / as sysdba
SQL> begin
dbms_service.modify_service(
service_name=>'srv_taf',
2 3 4 aq_ha_notifications=>true,
5 failover_method=>dbms_service.failover_method_basic,
6 failover_type=>dbms_service.failover_type_select,
7 failover_retries=>280,
8 failover_delay=>1,
9 clb_goal=>dbms_service.clb_goal_long);
10 end;
11 /
PL/SQL procedure successfully completed.
OVER!
[@more@]来自 “ ITPUB博客 ” ,链接:http://blog.itpub.net/14338195/viewspace-1051962/,如需转载,请注明出处,否则将追究法律责任。
转载于:http://blog.itpub.net/14338195/viewspace-1051962/