一、准备工作

1.  安装操作系统Red Hat Enterprise Linux6.5,略。

关闭防火墙,selinux,NetworkManager

iptables -F

service iptables save

service iptables stop

chkconfig  iptables off

service NetworkManager stop

chkconfig  NetworkManager off

sed -i '/SELINUX/s/enforcing/disabled/g' /etc/selinux/config

2.  配置ip和主机名

序号

IP

用途

备注

1

172.25.254.120

public1


2

172.25.254.121

public1-vip1


3

10.10.10.1

private1


4

172.25.254.130

Public2


5

172.25.254.131

public2-vip2


6

10.10.10.2

private2


7

172.25.254.125

scanip


 

3.   /etc/hosts

vi /etc/hosts

172.25.254.120  public1

172.25.254.121  public1-vip1

10.10.10.1  private1

172.25.254.130  public2

172.25.254.131  public2-vip2

10.10.10.2  private2

172.25.254.125 scanip

 

cat /etc/sysconfig/network

NETWORKING=yes

HOSTNAME=public1

 

4.   yum搭建

ISO光盘内部文件拷贝到/var/www/html

cd /etc/yum.repos.d/

vim ab.repo

[a]

name=1

baseurl=http://172.25.254.120

enabled=1

gpgcheck=0

[b]

name=b

baseurl=http://172.25.254.120/ResilientStorage

enabled=1

gpgcheck=0

[c]

name=c

baseurl=http://172.25.254.120/HighAvailability

enabled=1

gpgcheck=0

[d]

name=d

baseurl=http://172.25.254.120/ScalableFileSystem

enabled=1

gpgcheck=0

[e]

name=e

baseurl=http://172.25.254.120/LoadBalancer

enabled=1

gpgcheck=0

5.  安装rpm

yum  -y update

reboot

yum -y install binutils* compat-libstdc* elfutils-libelf* gcc* glibc* libaio* libgcc* libstdc* make* sysstat* unixODBC*  pdksh-5.2.14-37.el5_8.1.x86_64.rpm    (此包不在镜像内,oracledai)

 

5. 修改内核参数

分别在两节点上执行:

vi /etc/sysctl.conf

末尾添加

kernel.shmmni = 4096

kernel.sem = 250 32000 100 128

net.ipv4.ip_local_port_range = 9000 65500

net.core.rmem_default = 262144

net.core.rmem_max = 4194304

net.core.wmem_default = 262144

net.core.wmem_max = 1048576

fs.aio-max-nr = 1048576

fs.file-max = 6815744

 

重新加载生效:sysctl -p

 

[root@db1 ~]#

6.  修改两节点的oracle用户限制

vi /etc/security/limits.conf 末尾添加:

oracle    soft   nofile   2047

oracle    hard   nofile   65536

oracle    soft   nproc    2047

oracle    hard   nproc    16384

oracle    soft   stack    10240

oracle    hard   stack    32768

grid   soft   nofile   2047

grid   hard   nofile   65536

grid   soft   nproc    2047

grid   hard   nproc    16384

grid   soft   stack    10240

grid   hard   stack    32768

 

7.  修改两节点的/etc/pam.d/login

vi /etc/pam.d/login 末尾添加

sessionrequiredpam_limits.so

session    required     /lib64/security/pam_limits.so

 

8.  时间服务器配置

不使用系统ntp同步服务,使用oracle自身的ctss同步时间

停止ntp服务

[root@db1 ~]# service ntpd stop

ntp配置文件重命名

[root@db1 ~]# mv /etc/ntp.conf /etc/ntp.conf.bak

如使用ntp服务,按如下配置:

[root@db1 ~]# yum install ntp

[root@db1 ~]# vi /etc/ntp.conf

添加一行server 21.12.145.44

 

[root@db1 ~]# vi /etc/sysconfig/ntpd

OPTIONS="-x -u ntp:ntp -p /var/run/ntpd.pid"

 

[root@db1 ~]# service ntpd restart

[root@db1 ~]# chkconfig ntpd on

9.  更改安装所有者的 ulimit 设置

两节点都要执行:

vi /etc/profile 末尾添加:

if [ /$USER = "oracle" ] || [ $USER = "grid" ]; then

    if [ /$SHELL = "/bin/ksh" ]; then

        ulimit -p 16384

        ulimit -n 65536

    else

        ulimit -u 16384 -n 65536

    fi

    umask 022

fi

10. 两节点建立必要的组和用户

[root@db1 ~]#

groupadd -g 501 oinstall

groupadd -g 502 dba

groupadd -g 503 oper

groupadd -g 504 asmadmin

groupadd -g 505 asmdba

groupadd -g 506 asmoper

 

useradd -u 501 -g oinstall -G dba,oper,asmdba oracle

useradd -u 502 -g oinstall -G asmadmin,asmoper,asmdba grid

echo oracle |passwd --stdin  oracle

echo oracle |passwd --stdin  grid

 

密码都设置为oracle

11. 两节点建立安装目录

mkdir -p /u01/app/grid

mkdir -p /u01/app/11.2.0.3/grid

mkdir -p /u01/app/oracle

chown -R grid:oinstall /u01/app/grid

chown -R grid:oinstall /u01/app/11.2.0.3

chown -R oracle:oinstall /u01/app/oracle

chown grid:oinstall /u01/app

chmod -R 775 /u01

 

12. 两节点设置oraclegrid用户的环境变量

su - oracle

vi .bash_profile

exportORACLE_SID=ats1

exportORACLE_UNQNAME=ats

exportORACLE_BASE=/u01/app/oracle

exportORACLE_HOME=$ORACLE_BASE/product/11.2.0.3/db_1

exportPATH=$ORACLE_HOME/bin:$PATH

exportLD_LIBRARY_PATH=$ORACLE_HOME/lib:/lib:/usr/lib

exportNLS_LANG=AMERICAN_AMERICA.UTF8

exportTEMP=/tmp

exportTMPDIR=/tmp

umask 022

 

 

绿色部分一个一,一个二

su - grid

vi .bash_profile

exportORACLE_SID=+ASM1

exportORACLE_BASE=/u01/app/grid

exportORACLE_HOME=/u01/app/11.2.0.3/grid

exportPATH=$ORACLE_HOME/bin:$PATH:/usr/local/bin/:.

exportLD_LIBRARY_PATH=$ORACLE_HOME/lib:/lib:/usr/lib

exportNLS_LANG=AMERICAN_AMERICA.UTF8

exportTEMP=/tmp

exportTMP=/tmp

exportTMPDIR=/tmp

umask 022

 

13.  asm磁盘有两种方式

此处只做udev方式

讲磁盘划分几个大小相同sdb1sdb2sdb3(做仲裁卷组)几个空间较大存储sdb5sdb6

vim /etc/udev/rules.d/60-raw.rules

ACTION=="add", KERNEL=="sdb1", RUN+="/bin/raw /dev/raw/raw1 %N"

ACTION=="add", KERNEL=="sdb2", RUN+="/bin/raw /dev/raw/raw2 %N"

ACTION=="add", KERNEL=="sdb3", RUN+="/bin/raw /dev/raw/raw3 %N"

ACTION=="add", KERNEL=="sdb5", RUN+="/bin/raw /dev/raw/raw5 %N"

ACTION=="add", KERNEL=="sdb6", RUN+="/bin/raw /dev/raw/raw6 %N"

KERNEL=="raw[1-6]",OWNER="grid",GROUP="asmadmin",MODE="0660"

 

start_udev

 

cd /lib64

ln -s libcap.so.2.16  libcap.so.1

二、安装Grid Infrastructure

unzip p13390677_112040_Linux-x86-64_3of7.zip

unzip p13390677_112040_Linux-x86-64_1of7.zip

unzip p13390677_112040_Linux-x86-64_2of7.zip

1. 上传Grid Infrastructureoracle11g安装文件

在解压后的./grid/rpm目录下找到cvuqdisk-*.rpm,执行如下

[root@db1 ~]# CVUQDISK_GRO=oinstall;export CVUQDISK_GRP

[root@db1 ~]#  rpm -ivh cvuqdisk-1.0.9-1.rpm

2. 授予以上两个安装程序可执行权限

chown -R grid:oinstall ./grid

chown -R oracle:oinstall ./database

chmod -R 775 ./

 

3. 安装Grid Infrastructure

 [grid@rac1 grid]$ ./runInstaller

wKioL1mv-wzAQMejAACVC4xOdvE522.png-wh_50

wKioL1mv_DbAFmT7AAB0DtRrJX0931.png-wh_50

wKiom1mv_FXwf_KWAACRm5DPh2s986.png-wh_50

wKioL1mv_DejXjFRAACeaLNEERM438.png-wh_50

wKioL1mv_Dfx-nAkAACQ_qFo4Xc290.png-wh_50

wKiom1mv_FazMt8EAACx6fIa_4k303.png-wh_50

wKioL1mv_DfxjrVGAAC38j0Bo5o630.png-wh_50

wKiom1mv_FfQBILoAACsb3DFzZc856.png-wh_50

wKioL1mv_DjATPO_AACe-UkW52o949.png-wh_50

wKiom1mv_FfhlfwEAACxMt1IeW8378.png-wh_50

wKioL1mv_DjSHe_rAAC3qi3KZgc418.png-wh_50

wKiom1mv_FjwZHBhAACk-fM8ASw389.png-wh_50

wKioL1mv_DmRNksrAADCWarNLKQ029.png-wh_50

wKiom1mv_FjxsJf2AAC5lttHMI0468.png-wh_50

wKiom1mv_FmTWc1oAAD1b5jcg04060.png-wh_50

wKioL1mv_DrQFmkuAAC0qqmCFCE210.png-wh_50

wKioL1mv_DqRu61IAAAMNV9mSPU189.png-wh_50

脚本两个节点都做,而且第一个都执行完,再执行第二个

4. 部分集群命令

root调用普通用户下的变量方法

su - user        su  root   即可

 

停止grid  高可用,所有节点都做

crsctl stop crs停止服务

crsctl stop  cluster -all

crsctl start  crs启动服务

crsctl start  cluster -all

crsctl check cluster -all集群检查

crsctl query css votedisk列出投票磁盘

crsctl query crs softwareversion public2查节点版本

crsctl get css获取css参数值

crsctl  delete  css votedisk <path> - 删除一个投票磁盘

crs_stat -t查看crs资源状态

crsctl stat res -t

 

三:oracle安装

1. 识别节点问题

INS-35423 安装 database 时安装程序无法获取集群节点(vim改完有可能打不图形)

sed -i '/IDX/s/IDX="1"/IDX="1" CRS="true"/g'  /u01/app/oraInventory/ContentsXML/inventory.xml

 

cat /u01/app/oraInventory/ContentsXML/inventory.xml

<?xml version="1.0" standalone="yes" ?>

<!-- Copyright (c) 1999, 2013, Oracle and/or its affiliates.

All rights reserved. -->

<!-- Do not modify the contents of this file by hand. -->

<INVENTORY>

<VERSION_INFO>

   <SAVED_WITH>11.2.0.4.0</SAVED_WITH>

   <MINIMUM_VER>2.1.0.6.0</MINIMUM_VER>

</VERSION_INFO>

<HOME_LIST>

<HOME NAME="Ora11g_gridinfrahome1" LOC="/u01/app/11.2.0.3/grid" TYPE="O" IDX="1" CRS="true">

   <NODE_LIST>

      <NODE NAME="public1"/>

      <NODE NAME="public2"/>

   </NODE_LIST>

</HOME>

</HOME_LIST>

<COMPOSITEHOME_LIST>

</COMPOSITEHOME_LIST>

</INVENTORY>

 

2.  安装oracle

wKiom1mv_UXxpAp6AACRalFCwJA354.png-wh_50

wKioL1mv_SbxflFuAACWQD0X39s653.png-wh_50

wKiom1mv_UbDzmc9AACPa6cAh-s846.png-wh_50

wKiom1mv_UbB8DvmAACmX5j-sk8790.png-wh_50

wKioL1mv_SeQxWo1AACiblxa7qI325.png-wh_50

wKiom1mv_UaQS2GpAACwMyLtaL8802.png-wh_50

wKioL1mv_SiRhDG-AACopsUmZew380.png-wh_50

wKioL1mv_Sjif8_BAACkL3u6oBI854.png-wh_50

wKiom1mv_Uew5X1rAACn3NU6uvk730.png-wh_50

wKioL1mv_Sih23U9AACY7gdFl6E995.png-wh_50

wKiom1mv_UjBj9flAADwTcwWluY209.png-wh_50

各个节点执行文件

 

3.  增加磁盘组

grid用户下执行:

asmca

wKioL1mv_Y7h-HWbAAD4D_FqF98550.png-wh_50

4.  创建监听

su - grid

netca

wKiom1mv_d_T_3ziAADjE56p5yA897.png-wh_50


wKiom1mv_paCiY3uAADkUzNNOKU437.png-wh_50

wKioL1mv_neB5iC8AADMvECeY3c899.png-wh_50

wKiom1mv_peDRObxAADcqyocoAA782.png-wh_50

wKiom1mv_pezcVhNAADSajNaPNM162.png-wh_50

wKioL1mv_nigB9FTAADIW8Hmmw0319.png-wh_50

wKioL1mv_niR7ZcnAADvRplLr6g913.png-wh_50

5.  创建数据库

 

su - oracle

dbca

wKioL1mv_znjSTwIAADQlBVBq_w800.png-wh_50

wKiom1mv_1iReJhGAAEE21XVHjU112.png-wh_50

wKiom1mv_1jAWycHAADKRMvaWqs801.png-wh_50

wKioL1mv_zqwHUXCAADPMfNwFzQ359.png-wh_50

wKioL1mv_zqDrCZwAACHs2foPxY192.png-wh_50

wKiom1mv_1nTZndsAADuTJXuVaA237.png-wh_50

wKioL1mv_zriRS1EAAAjAO3oKV0496.png-wh_50

wKiom1mv_1ryGH07AADb3nlT-UY666.png-wh_50

wKioL1mv_zvA9qM4AADcLCXctis404.png-wh_50

wKioL1mv_zuiZRPnAADIwo0NzmE055.png-wh_50

wKiom1mv_1rQ8RvUAADyd--kgYM134.png-wh_50

wKioL1mv_zyTp6tRAABwTk1l3mM808.png-wh_50

wKioL1mv_zzAjQg6AADWX7HKNHQ181.png-wh_50

wKiom1mv_1vhQSHlAABK0pSzVl0859.png-wh_50

6.  创建表空间


select file_name from dba_data_files;

 

create tablespace  xiang  datafile '+WANG/uap/datafile/xiang.dbf' size 100m autoextend on;

 create user wang identified by "123456" default tablespace xiang profile DEFAULT account unlock;

grant dba to wang;

 

grant unlimited tablespace to wang;

四:开关机顺序

 

1.  关闭数据库

crsctl stop crs       srvctl stop database -d ORCL ---停止所有节点上的实例

2.  停止共享存储asm

srvctl stop   asm -n node_name

 

3.  停止集群

crsctl stop cluster -all

 

启动顺序

1.  启动集群

su - grid   

su root

crsctl start crs  (crsctl start cluster -all)

或者

crsctl start cluster -n rac1 rac2  --两个节点同时启动

2.  启动共享存储asm

srvctl start asm -n node_name

3.  启动数据库

srvctl start database -d ORCL

 

 

五:附录及问题处理

1.  集群各节点状态

[grid@public1 ~]$ crs_stat -t

Name           Type           Target    State     Host        

------------------------------------------------------------

ora.DATA.dg    ora....up.type ONLINE    ONLINE    public1     

ora....ER.lsnr ora....er.type ONLINE    ONLINE    public1     

ora....N1.lsnr ora....er.type ONLINE    ONLINE    public1     

ora....N2.lsnr ora....er.type ONLINE    ONLINE    public2     

ora....N3.lsnr ora....er.type ONLINE    ONLINE    public2     

ora.WANG.dg    ora....up.type ONLINE    ONLINE    public1     

ora.asm        ora.asm.type   ONLINE    ONLINE    public1     

ora.cvu        ora.cvu.type   ONLINE    ONLINE    public2     

ora.gsd        ora.gsd.type   OFFLINE   OFFLINE               

ora....network ora....rk.type ONLINE    ONLINE    public1     

ora.oc4j       ora.oc4j.type  ONLINE    ONLINE    public2     

ora.ons        ora.ons.type   ONLINE    ONLINE    public1     

ora....SM1.asm application    ONLINE    ONLINE    public1     

ora....C1.lsnr application    ONLINE    ONLINE    public1     

ora....ic1.gsd application    OFFLINE   OFFLINE               

ora....ic1.ons application    ONLINE    ONLINE    public1     

ora....ic1.vip ora....t1.type ONLINE    ONLINE    public1     

ora....SM2.asm application    ONLINE    ONLINE    public2     

ora....C2.lsnr application    ONLINE    ONLINE    public2     

ora....ic2.gsd application    OFFLINE   OFFLINE               

ora....ic2.ons application    ONLINE    ONLINE    public2     

ora....ic2.vip ora....t1.type ONLINE    ONLINE    public2     

ora....ry.acfs ora....fs.type ONLINE    ONLINE    public1     

ora.scan1.vip  ora....ip.type ONLINE    ONLINE    public1     

ora.scan2.vip  ora....ip.type ONLINE    ONLINE    public2     

ora.scan3.vip  ora....ip.type ONLINE    ONLINE    public2

2.  ons服务无法online

crs_stat -t

ora.rac2.ons   application    ONLINE    OFFLINE

 

onsctl ping

Failed to get IP for localhost (2)

ons is not running ...

 

onsctl debug

Failed to get IP for localhost (2)

处理方法一:

/etc/hosts发生变化,改回去重启ons

crs_start ora.rac1.ons

crs_start ora.rac2.ons

crs_stat -t

 

3. CRS-0184:解决(储存问题)

CRS-0184:CannotcommunicatewiththeCRSdaemon.之存储故障解决办法

方式一:

crs_stat-t

CRS-0184: Cannot communicate with the CRSdaemon.

 

crs  ctlcheck crs

CRS-4638: Oracle High Availability Servicesis online

CRS-4535: Cannot communicate with ClusterReady Services

CRS-4529: Cluster Synchronization Servicesis online

CRS-4533: Event Manager is online

 

查日志

find/ -name crsd.log

2014-12-09 12:02:57.112: [ CRSOCR][1] OCR context init failure. Error: PROC-26: Error while accessing thephysical storage

 

2014-12-09 12:02:57.112: [ CRSD][1] Created alert : (:CRSD00111:): Could not init OCR, error: PROC-26:Error while accessing the physical storage

2014-12-09 12:02:57.112: [ CRSD][1][PANIC] CRSD exiting: Could notinit OCR, code: 26

2014-12-09 12:02:57.112: [ CRSD][1] Done.

 

lsblk

解决

查看asm_disk是否正常

sugrid

 

$sqlplus/ as sysasm

 

SQL>col name format a15

 

SQL>col path format a17

 

SQL>select name,path,header_status,mount_status,state from v$asm_disk;

 

NAME PATH HEADER_STATU MOUNT_S STATE

 

--------------- ----------------------------- ------- --------

 

/dev/rhdisk6 MEMBER CLOSED NORMAL

/dev/rhdisk7 MEMBER CLOSED NORMAL

/dev/rhdisk8 MEMBER CLOSED NORMAL

DATA_0000 /dev/rhdisk3 MEMBER CACHED NORMAL

DATA_0001 /dev/rhdisk4 MEMBER CACHED NORMAL

DATA_0002 /dev/rhdisk5 MEMBER CACHED NORMAL

 

su grid

 

$asmcmd

ASMCMD>lsdg查看asm磁盘组

ASMCMD>mount -a刷新

 

ASMCMD>lsdg

 

State Type Rebal Sector Block AU Total_MB Free_MB Req_mir_free_MB Usable_file_MB Offline_disks Voting_files Name

 

MOUNTED EXTERN N 512 4096 1048576 3145728 2058582 0 2058582 0 N DATA/

 

MOUNTED NORMAL N 512 4096 1048576 3072 2146 1024 561 0 Y OCR/

 

再次查看

sugrid

 

$sqlplus/ as sysasm

 

SQL>col name format a15

 

SQL>col path format a17

 

SQL> selectname,path,header_status,mount_status,state from v$asm_disk;

 

DATA_0000 /dev/rhdisk3 MEMBER CACHED NORMAL

 

DATA_0001 /dev/rhdisk4 MEMBER CACHED NORMAL

 

DATA_0002 /dev/rhdisk5 MEMBER CACHED NORMAL

 

OCR_0000 /dev/rhdisk6 MEMBER CACHED NORMAL

 

OCR_0001 /dev/rhdisk7 MEMBER CACHED NORMAL

 

OCR_0002 /dev/rhdisk8 MEMBER CACHED NORMAL

方式二:(重新加入集群)

报错原因查找:

在节点1节点查看IP地址,VIP已经不存在

在节点2上运行“crs_stat -t”发现,节点1好像已经退出集群环境

 su - grid

$ crs_stat -t

Name          Type          Target    State    Host        

------------------------------------------------------------

ora.DATA.dg    ora....up.type        ONLINE    ONLINE    node2      

ora....ER.lsnr  ora....er.type        ONLINE    ONLINE    node2      

ora....N1.lsnr  ora....er.type        ONLINE    OFFLINE              

ora.OCR.dg    ora....up.type        ONLINE    ONLINE    node2      

ora.asm      ora.asm.type          ONLINE    ONLINE    node2      

ora.cvu      ora.cvu.type          ONLINE    OFFLINE              

ora.gsd      ora.gsd.type          OFFLINE  OFFLINE              

ora....network  ora....rk.type        ONLINE    ONLINE    node2      

ora.node1.vip  ora....t1.type        ONLINE    OFFLINE              

ora....SM2.asm  application          ONLINE    ONLINE    node2      

ora....E2.lsnr  application          ONLINE    ONLINE    node2      

ora.node2.gsd  application          OFFLINE  OFFLINE              

ora.node2.ons  application          ONLINE    ONLINE    node2      

ora.node2.vip  ora....t1.type        ONLINE    ONLINE    node2      

ora.oc4j      ora.oc4j.type        ONLINE    OFFLINE              

ora.ons      ora.ons.type          ONLINE    ONLINE    node2      

ora.prod.db    ora....se.type        ONLINE    ONLINE    node2      

ora....ry.acfs  ora....fs.type        ONLINE    ONLINE    node2      

ora.scan1.vip  ora....ip.type        ONLINE    OFFLINE              

从输出中看到节点1VIP已经飘逸到节点2,从而判断节点1已经脱离集群。

通过"ifconfig"查看节点2上的ip信息。

 

问题解决:

清理节点1的配置信息,打算重新运行root.sh

# /u01/app/11.2.0/grid/crs/install/rootcrs.pl -verbose -deconfig -force

 

# /u01/app/11.2.0/grid/crs/install/roothas.pl -verbose -deconfig -force

 ./u01/app/11.2.0/grid/root.sh