一、准备工作
1. 安装操作系统Red Hat Enterprise Linux6.5,略。
关闭防火墙,selinux,NetworkManager
iptables -F
service iptables save
service iptables stop
chkconfig iptables off
service NetworkManager stop
chkconfig NetworkManager off
sed -i '/SELINUX/s/enforcing/disabled/g' /etc/selinux/config
2. 配置ip和主机名
序号 | IP | 用途 | 备注 |
1 | 172.25.254.120 | public1 | |
2 | 172.25.254.121 | public1-vip1 | |
3 | 10.10.10.1 | private1 | |
4 | 172.25.254.130 | Public2 | |
5 | 172.25.254.131 | public2-vip2 | |
6 | 10.10.10.2 | private2 | |
7 | 172.25.254.125 | scanip |
3. /etc/hosts
vi /etc/hosts
172.25.254.120 public1
172.25.254.121 public1-vip1
10.10.10.1 private1
172.25.254.130 public2
172.25.254.131 public2-vip2
10.10.10.2 private2
172.25.254.125 scanip
cat /etc/sysconfig/network
NETWORKING=yes
HOSTNAME=public1
4. yum搭建
讲ISO光盘内部文件拷贝到/var/www/html下
cd /etc/yum.repos.d/
vim ab.repo
[a]
name=1
baseurl=http://172.25.254.120
enabled=1
gpgcheck=0
[b]
name=b
baseurl=http://172.25.254.120/ResilientStorage
enabled=1
gpgcheck=0
[c]
name=c
baseurl=http://172.25.254.120/HighAvailability
enabled=1
gpgcheck=0
[d]
name=d
baseurl=http://172.25.254.120/ScalableFileSystem
enabled=1
gpgcheck=0
[e]
name=e
baseurl=http://172.25.254.120/LoadBalancer
enabled=1
gpgcheck=0
5. 安装rpm
yum -y update
reboot
yum -y install binutils* compat-libstdc* elfutils-libelf* gcc* glibc* libaio* libgcc* libstdc* make* sysstat* unixODBC* pdksh-5.2.14-37.el5_8.1.x86_64.rpm (此包不在镜像内,oracledai)
5. 修改内核参数
分别在两节点上执行:
vi /etc/sysctl.conf
末尾添加:
kernel.shmmni = 4096
kernel.sem = 250 32000 100 128
net.ipv4.ip_local_port_range = 9000 65500
net.core.rmem_default = 262144
net.core.rmem_max = 4194304
net.core.wmem_default = 262144
net.core.wmem_max = 1048576
fs.aio-max-nr = 1048576
fs.file-max = 6815744
重新加载生效:sysctl -p
[root@db1 ~]#
6. 修改两节点的oracle用户限制
vi /etc/security/limits.conf 末尾添加:
oracle soft nofile 2047
oracle hard nofile 65536
oracle soft nproc 2047
oracle hard nproc 16384
oracle soft stack 10240
oracle hard stack 32768
grid soft nofile 2047
grid hard nofile 65536
grid soft nproc 2047
grid hard nproc 16384
grid soft stack 10240
grid hard stack 32768
7. 修改两节点的/etc/pam.d/login
vi /etc/pam.d/login 末尾添加:
sessionrequiredpam_limits.so
session required /lib64/security/pam_limits.so
8. 时间服务器配置
不使用系统ntp同步服务,使用oracle自身的ctss同步时间
停止ntp服务
[root@db1 ~]# service ntpd stop
将ntp配置文件重命名
[root@db1 ~]# mv /etc/ntp.conf /etc/ntp.conf.bak
如使用ntp服务,按如下配置:
[root@db1 ~]# yum install ntp
[root@db1 ~]# vi /etc/ntp.conf
添加一行server 21.12.145.44
[root@db1 ~]# vi /etc/sysconfig/ntpd
OPTIONS="-x -u ntp:ntp -p /var/run/ntpd.pid"
[root@db1 ~]# service ntpd restart
[root@db1 ~]# chkconfig ntpd on
9. 更改安装所有者的 ulimit 设置
两节点都要执行:
vi /etc/profile 末尾添加:
if [ /$USER = "oracle" ] || [ $USER = "grid" ]; then
if [ /$SHELL = "/bin/ksh" ]; then
ulimit -p 16384
ulimit -n 65536
else
ulimit -u 16384 -n 65536
fi
umask 022
fi
10. 两节点建立必要的组和用户
[root@db1 ~]#
groupadd -g 501 oinstall
groupadd -g 502 dba
groupadd -g 503 oper
groupadd -g 504 asmadmin
groupadd -g 505 asmdba
groupadd -g 506 asmoper
useradd -u 501 -g oinstall -G dba,oper,asmdba oracle
useradd -u 502 -g oinstall -G asmadmin,asmoper,asmdba grid
echo oracle |passwd --stdin oracle
echo oracle |passwd --stdin grid
密码都设置为oracle
11. 两节点建立安装目录
mkdir -p /u01/app/grid
mkdir -p /u01/app/11.2.0.3/grid
mkdir -p /u01/app/oracle
chown -R grid:oinstall /u01/app/grid
chown -R grid:oinstall /u01/app/11.2.0.3
chown -R oracle:oinstall /u01/app/oracle
chown grid:oinstall /u01/app
chmod -R 775 /u01
12. 两节点设置oracle和grid用户的环境变量
su - oracle
vi .bash_profile
exportORACLE_SID=ats1
exportORACLE_UNQNAME=ats
exportORACLE_BASE=/u01/app/oracle
exportORACLE_HOME=$ORACLE_BASE/product/11.2.0.3/db_1
exportPATH=$ORACLE_HOME/bin:$PATH
exportLD_LIBRARY_PATH=$ORACLE_HOME/lib:/lib:/usr/lib
exportNLS_LANG=AMERICAN_AMERICA.UTF8
exportTEMP=/tmp
exportTMPDIR=/tmp
umask 022
绿色部分一个一,一个二
su - grid
vi .bash_profile
exportORACLE_SID=+ASM1
exportORACLE_BASE=/u01/app/grid
exportORACLE_HOME=/u01/app/11.2.0.3/grid
exportPATH=$ORACLE_HOME/bin:$PATH:/usr/local/bin/:.
exportLD_LIBRARY_PATH=$ORACLE_HOME/lib:/lib:/usr/lib
exportNLS_LANG=AMERICAN_AMERICA.UTF8
exportTEMP=/tmp
exportTMP=/tmp
exportTMPDIR=/tmp
umask 022
13. asm磁盘有两种方式
此处只做udev方式
讲磁盘划分几个大小相同sdb1、sdb2、sdb3(做仲裁卷组)几个空间较大存储sdb5、sdb6
vim /etc/udev/rules.d/60-raw.rules
ACTION=="add", KERNEL=="sdb1", RUN+="/bin/raw /dev/raw/raw1 %N"
ACTION=="add", KERNEL=="sdb2", RUN+="/bin/raw /dev/raw/raw2 %N"
ACTION=="add", KERNEL=="sdb3", RUN+="/bin/raw /dev/raw/raw3 %N"
ACTION=="add", KERNEL=="sdb5", RUN+="/bin/raw /dev/raw/raw5 %N"
ACTION=="add", KERNEL=="sdb6", RUN+="/bin/raw /dev/raw/raw6 %N"
KERNEL=="raw[1-6]",OWNER="grid",GROUP="asmadmin",MODE="0660"
start_udev
cd /lib64
ln -s libcap.so.2.16 libcap.so.1
二、安装Grid Infrastructure
unzip p13390677_112040_Linux-x86-64_3of7.zip
unzip p13390677_112040_Linux-x86-64_1of7.zip
unzip p13390677_112040_Linux-x86-64_2of7.zip
1. 上传Grid Infrastructure和oracle11g安装文件
在解压后的./grid/rpm目录下找到cvuqdisk-*.rpm,执行如下
[root@db1 ~]# CVUQDISK_GRO=oinstall;export CVUQDISK_GRP
[root@db1 ~]# rpm -ivh cvuqdisk-1.0.9-1.rpm
2. 授予以上两个安装程序可执行权限
chown -R grid:oinstall ./grid
chown -R oracle:oinstall ./database
chmod -R 775 ./
3. 安装Grid Infrastructure
[grid@rac1 grid]$ ./runInstaller
脚本两个节点都做,而且第一个都执行完,再执行第二个
4. 部分集群命令
用root调用普通用户下的变量方法
su - user su root 即可
停止grid 高可用,所有节点都做
crsctl stop crs停止服务
crsctl stop cluster -all
crsctl start crs启动服务
crsctl start cluster -all
crsctl check cluster -all集群检查
crsctl query css votedisk列出投票磁盘
crsctl query crs softwareversion public2查节点版本
crsctl get css获取css参数值
crsctl delete css votedisk <path> - 删除一个投票磁盘
crs_stat -t查看crs资源状态
crsctl stat res -t
三:oracle安装
1. 识别节点问题
INS-35423 安装 database 时安装程序无法获取集群节点(vim改完有可能打不图形)
sed -i '/IDX/s/IDX="1"/IDX="1" CRS="true"/g' /u01/app/oraInventory/ContentsXML/inventory.xml
cat /u01/app/oraInventory/ContentsXML/inventory.xml
<?xml version="1.0" standalone="yes" ?>
<!-- Copyright (c) 1999, 2013, Oracle and/or its affiliates.
All rights reserved. -->
<!-- Do not modify the contents of this file by hand. -->
<INVENTORY>
<VERSION_INFO>
<SAVED_WITH>11.2.0.4.0</SAVED_WITH>
<MINIMUM_VER>2.1.0.6.0</MINIMUM_VER>
</VERSION_INFO>
<HOME_LIST>
<HOME NAME="Ora11g_gridinfrahome1" LOC="/u01/app/11.2.0.3/grid" TYPE="O" IDX="1" CRS="true">
<NODE_LIST>
<NODE NAME="public1"/>
<NODE NAME="public2"/>
</NODE_LIST>
</HOME>
</HOME_LIST>
<COMPOSITEHOME_LIST>
</COMPOSITEHOME_LIST>
</INVENTORY>
2. 安装oracle
各个节点执行文件
3. 增加磁盘组
在”grid”用户下执行:
asmca
4. 创建监听
su - grid
netca
5. 创建数据库
su - oracle
dbca
6. 创建表空间
select file_name from dba_data_files;
create tablespace xiang datafile '+WANG/uap/datafile/xiang.dbf' size 100m autoextend on;
create user wang identified by "123456" default tablespace xiang profile DEFAULT account unlock;
grant dba to wang;
grant unlimited tablespace to wang;
四:开关机顺序
1. 关闭数据库
crsctl stop crs srvctl stop database -d ORCL ---停止所有节点上的实例
2. 停止共享存储asm
srvctl stop asm -n node_name
3. 停止集群
crsctl stop cluster -all
启动顺序
1. 启动集群
su - grid
su root
crsctl start crs (crsctl start cluster -all)
或者
crsctl start cluster -n rac1 rac2 --两个节点同时启动
2. 启动共享存储asm
srvctl start asm -n node_name
3. 启动数据库
srvctl start database -d ORCL
五:附录及问题处理
1. 集群各节点状态
[grid@public1 ~]$ crs_stat -t
Name Type Target State Host
------------------------------------------------------------
ora.DATA.dg ora....up.type ONLINE ONLINE public1
ora....ER.lsnr ora....er.type ONLINE ONLINE public1
ora....N1.lsnr ora....er.type ONLINE ONLINE public1
ora....N2.lsnr ora....er.type ONLINE ONLINE public2
ora....N3.lsnr ora....er.type ONLINE ONLINE public2
ora.WANG.dg ora....up.type ONLINE ONLINE public1
ora.asm ora.asm.type ONLINE ONLINE public1
ora.cvu ora.cvu.type ONLINE ONLINE public2
ora.gsd ora.gsd.type OFFLINE OFFLINE
ora....network ora....rk.type ONLINE ONLINE public1
ora.oc4j ora.oc4j.type ONLINE ONLINE public2
ora.ons ora.ons.type ONLINE ONLINE public1
ora....SM1.asm application ONLINE ONLINE public1
ora....C1.lsnr application ONLINE ONLINE public1
ora....ic1.gsd application OFFLINE OFFLINE
ora....ic1.ons application ONLINE ONLINE public1
ora....ic1.vip ora....t1.type ONLINE ONLINE public1
ora....SM2.asm application ONLINE ONLINE public2
ora....C2.lsnr application ONLINE ONLINE public2
ora....ic2.gsd application OFFLINE OFFLINE
ora....ic2.ons application ONLINE ONLINE public2
ora....ic2.vip ora....t1.type ONLINE ONLINE public2
ora....ry.acfs ora....fs.type ONLINE ONLINE public1
ora.scan1.vip ora....ip.type ONLINE ONLINE public1
ora.scan2.vip ora....ip.type ONLINE ONLINE public2
ora.scan3.vip ora....ip.type ONLINE ONLINE public2
2. ons服务无法online
crs_stat -t
ora.rac2.ons application ONLINE OFFLINE
onsctl ping
Failed to get IP for localhost (2)
ons is not running ...
onsctl debug
Failed to get IP for localhost (2)
处理方法一:
/etc/hosts发生变化,改回去重启ons
crs_start ora.rac1.ons
crs_start ora.rac2.ons
crs_stat -t
3. CRS-0184:解决(储存问题)
CRS-0184:CannotcommunicatewiththeCRSdaemon.之存储故障解决办法
方式一:
crs_stat-t
CRS-0184: Cannot communicate with the CRSdaemon.
crs ctlcheck crs
CRS-4638: Oracle High Availability Servicesis online
CRS-4535: Cannot communicate with ClusterReady Services
CRS-4529: Cluster Synchronization Servicesis online
CRS-4533: Event Manager is online
查日志
find/ -name crsd.log
2014-12-09 12:02:57.112: [ CRSOCR][1] OCR context init failure. Error: PROC-26: Error while accessing thephysical storage
2014-12-09 12:02:57.112: [ CRSD][1] Created alert : (:CRSD00111:): Could not init OCR, error: PROC-26:Error while accessing the physical storage
2014-12-09 12:02:57.112: [ CRSD][1][PANIC] CRSD exiting: Could notinit OCR, code: 26
2014-12-09 12:02:57.112: [ CRSD][1] Done.等
lsblk
解决
查看asm_disk是否正常
su– grid
$sqlplus/ as sysasm
SQL>col name format a15
SQL>col path format a17
SQL>select name,path,header_status,mount_status,state from v$asm_disk;
NAME PATH HEADER_STATU MOUNT_S STATE
--------------- ----------------------------- ------- --------
/dev/rhdisk6 MEMBER CLOSED NORMAL
/dev/rhdisk7 MEMBER CLOSED NORMAL
/dev/rhdisk8 MEMBER CLOSED NORMAL
DATA_0000 /dev/rhdisk3 MEMBER CACHED NORMAL
DATA_0001 /dev/rhdisk4 MEMBER CACHED NORMAL
DATA_0002 /dev/rhdisk5 MEMBER CACHED NORMAL
su –grid
$asmcmd
ASMCMD>lsdg查看asm磁盘组
ASMCMD>mount -a刷新
ASMCMD>lsdg
State Type Rebal Sector Block AU Total_MB Free_MB Req_mir_free_MB Usable_file_MB Offline_disks Voting_files Name
MOUNTED EXTERN N 512 4096 1048576 3145728 2058582 0 2058582 0 N DATA/
MOUNTED NORMAL N 512 4096 1048576 3072 2146 1024 561 0 Y OCR/
再次查看
su– grid
$sqlplus/ as sysasm
SQL>col name format a15
SQL>col path format a17
SQL> selectname,path,header_status,mount_status,state from v$asm_disk;
DATA_0000 /dev/rhdisk3 MEMBER CACHED NORMAL
DATA_0001 /dev/rhdisk4 MEMBER CACHED NORMAL
DATA_0002 /dev/rhdisk5 MEMBER CACHED NORMAL
OCR_0000 /dev/rhdisk6 MEMBER CACHED NORMAL
OCR_0001 /dev/rhdisk7 MEMBER CACHED NORMAL
OCR_0002 /dev/rhdisk8 MEMBER CACHED NORMAL
方式二:(重新加入集群)
报错原因查找:
在节点1节点查看IP地址,VIP已经不存在
在节点2上运行“crs_stat -t”发现,节点1好像已经退出集群环境
su - grid
$ crs_stat -t
Name Type Target State Host
------------------------------------------------------------
ora.DATA.dg ora....up.type ONLINE ONLINE node2
ora....ER.lsnr ora....er.type ONLINE ONLINE node2
ora....N1.lsnr ora....er.type ONLINE OFFLINE
ora.OCR.dg ora....up.type ONLINE ONLINE node2
ora.asm ora.asm.type ONLINE ONLINE node2
ora.cvu ora.cvu.type ONLINE OFFLINE
ora.gsd ora.gsd.type OFFLINE OFFLINE
ora....network ora....rk.type ONLINE ONLINE node2
ora.node1.vip ora....t1.type ONLINE OFFLINE
ora....SM2.asm application ONLINE ONLINE node2
ora....E2.lsnr application ONLINE ONLINE node2
ora.node2.gsd application OFFLINE OFFLINE
ora.node2.ons application ONLINE ONLINE node2
ora.node2.vip ora....t1.type ONLINE ONLINE node2
ora.oc4j ora.oc4j.type ONLINE OFFLINE
ora.ons ora.ons.type ONLINE ONLINE node2
ora.prod.db ora....se.type ONLINE ONLINE node2
ora....ry.acfs ora....fs.type ONLINE ONLINE node2
ora.scan1.vip ora....ip.type ONLINE OFFLINE
从输出中看到节点1的VIP已经飘逸到节点2,从而判断节点1已经脱离集群。
通过"ifconfig"查看节点2上的ip信息。
问题解决:
清理节点1的配置信息,打算重新运行root.sh
# /u01/app/11.2.0/grid/crs/install/rootcrs.pl -verbose -deconfig -force
# /u01/app/11.2.0/grid/crs/install/roothas.pl -verbose -deconfig -force
./u01/app/11.2.0/grid/root.sh
转载于:https://blog.51cto.com/13272050/1963276