https://blog.csdn.net/heizistudio/article/details/8638488
nrpe安装脚本
nagios-plugins-1.4.13.tar.gz
nrpe-2.8.tar.gz
源码包文件提前下载好
[root@dd]more install.sh
#!/bin/bash
tar -xvzf nagios-plugins-1.4.13.tar.gz
cd nagios-plugins-1.4.13
./configure
make
make install
groupadd nagios
useradd -g nagios -s /sbin/nologin nagios
echo !@NA#$gI%o^S | passwd nagios --stdin
cd ..
tar -xvzf nrpe-2.8.tar.gz
cd nrpe-2.8
./configure
make all
make install-plugin
make install-daemon
make install-daemon-config
echo "OK!"
[root@dd]
修改 nrpe文件
vim /usr/local/nagios/etc/nrpe.cfg
#nrpe_user=nagios
nrpe_user=oracle ---以oracle用户运行
allowed_hosts=127.0.0.1,192.169.169.60 --添加监控允许IP
修改check_oracle插件脚本
vim /usr/local/nagios/libexec/check_oracle
添加两行
#! /bin/sh
#
# latigid010@yahoo.com
# 01/06/2000
#
# This Nagios plugin was created to check Oracle status
#
ORACLE_HOME=/opt/oracle/product/10g
PATH=$ORACLE_HOME/bin:$PATH
vim +257 /usr/local/nagios/libexec/check_oracle
257 ON a.tablespace_name=b.tablespace_name WHERE a.tablespace_name=upper('${5}');---方便监控
修改 /usr/local/nagios/etc/nrpe.cfg 文件
。加入以下内容:
#Check Oracle
command[check_oracle_tns]=/usr/local/nagios/libexec/check_oracle –tns orcl
command[check_oracle_db]=/usr/local/nagios/libexec/check_oracle –db orcl
command[check_oracle_login]=/usr/local/nagios/libexec/check_oracle –login orcl c
ommand[check_oracle_cache]=/usr/local/nagios/libexec/check_oracle –cache orcl system lkjsdf 80 90
command[check_oracle_tablespace]=/usr/local/nagios/libexec/check_oracle –tablespace orcl system lkjsdf tab 90 80
具体参数写法请参考 check_oracle –help
二、 配置 nrpe 服务
修改 /usr/local/nagios/etc/nrpe.cfg 文件。加入以下内容:
#Check Oracle
command[check_oracle_tns]=/usr/local/nagios/libexec/check_oracle –tns orcl
command[check_oracle_db]=/usr/local/nagios/libexec/check_oracle –db orcl
command[check_oracle_login]=/usr/local/nagios/libexec/check_oracle –login orcl
command[check_oracle_cache]=/usr/local/nagios/libexec/check_oracle –cache orcl system lkjsdf 80 90
command[check_oracle_tablespace]=/usr/local/nagios/libexec/check_oracle –tablespace orcl system lkjsdf tab 90 80
具体参数写法请参考 check_oracle –help。
配置完成后,重启 xinetd 服务。
三、配置 Nagios 服务端
1.安装 nrpe 脚本支持。—参考官方文档。
2.添加 nrpe 命令配置。修改 nagios/etc/objects/command.cfg 文件:
define command {
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$
}
3.在 nagios/etc/objects 添加oracle 主机配置文件。oracle.cfg 。
define host {
use linux-server
host_name oracle
alias Oracle 10g
address 192.168.100.63
}
define service {
use generic-service
host_name oracle
service_description TNS Check
check_command check_nrpe!check_oracle_tns
}
define service {
use generic-service
host_name oracle
service_description DB Check
check_command check_nrpe!check_oracle_db
}
define service {
use generic-service
host_name oracle
service_description Login Check
check_command check_nrpe!check_oracle_login
}
define service {
use generic-service
host_name oracle
service_description Cache Check
check_command check_nrpe!check_oracle_cache
}
define service {
use generic-service
host_name oracle
service_description Tablespace Check
check_command check_nrpe!check_oracle_tablespace
}
效果图:
NRPE 以及 Nagios-Plugin 安装文件见附件包内。NRPE 官方文档也包含在附件包呢。
备注:
图中 Tablespace check 还没有细调,用了错误的 table 名。
通过修改脚本还可以监视临时表空间。
CRITICAL - ORA-28009: connection as SYS should be as SYSDBA or SYSOPER
[root@dd]/usr/local/nagios/libexec/check_oracle --tablespace duxiu sys oracle USERS 90 80-------勿用sys用户
---------------------------------------------------
监控效果如下
Script to read Oracle's alert log | ||
The following script creates two tables:
read_alert and
read_alert_disk.
read_alert_disk is an
external table and contains the content of the
alert log.
read_alert will be empty after this script has been executed. It is used by the
update_alert_log script, shown further below.
define alert_length="2000" drop table alert_log; create table alert_log ( alert_date date, alert_text varchar2(&&alert_length) ) storage (initial 512k next 512K pctincrease 0); create index alert_log_idx on alert_log(alert_date) storage (initial 512k next 512K pctincrease 0); column db new_value _DB noprint; column bdump new_value _bdump noprint; select instance_name db from v$instance; select value bdump from v$parameter where name ='background_dump_dest'; drop directory BDUMP; create directory BDUMP as '&&_bdump'; drop table alert_log_disk; create table alert_log_disk ( text varchar2(&&alert_length) ) organization external ( type oracle_loader default directory BDUMP access parameters ( records delimited by newline nologfile nobadfile fields terminated by "&" ltrim ) location('alert_&&_DB..log') ) reject limit unlimited; update_alert_log.sql
Now, after the two tables are created, the
alert_log table can be filled with the following script. It only loads those records that are greater than the last time it loaded. And it loads the date/time on every line for convienance. It also helps when the alertlogs get rotated. You still keep the history within an Oracle table. Finally, it also strips out all the «crap» that is really not needed to see if you are looking for errors.
update_alert_log.sql
set serveroutput on declare isdate number := 0; start_updating number := 0; rows_inserted number := 0; alert_date date; max_date date; alert_text alert_log_disk.text%type; begin /* find a starting date */ select max(alert_date) into max_date from alert_log; if (max_date is null) then max_date := to_date('01-jan-1980', 'dd-mon-yyyy'); end if; for r in ( select substr(text,1,180) text from alert_log_disk where text not like '%offlining%' and text not like 'ARC_:%' and text not like '%LOG_ARCHIVE_DEST_1%' and text not like '%Thread 1 advanced to log sequence%' and text not like '%Current log#%seq#%mem#%' and text not like '%Undo Segment%lined%' and text not like '%alter tablespace%back%' and text not like '%Log actively being archived by another process%' and text not like '%alter database backup controlfile to trace%' and text not like '%Created Undo Segment%' and text not like '%started with pid%' and text not like '%ORA-12012%' and text not like '%ORA-06512%' and text not like '%ORA-000060:%' and text not like '%coalesce%' and text not like '%Beginning log switch checkpoint up to RBA%' and text not like '%Completed checkpoint up to RBA%' and text not like '%specifies an obsolete parameter%' and text not like '%BEGIN BACKUP%' and text not like '%END BACKUP%' ) loop isdate := 0; alert_text := null; select count(*) into isdate from dual where substr(r.text, 21) in ('2003','2004','2005','2006','2007') and r.text not like '%cycle_run_year%'; if (isdate = 1) then select to_date(substr(r.text, 5),'Mon dd hh24:mi:ss rrrr') into alert_date from dual; if (alert_date > max_date) then start_updating := 1; end if; else alert_text := r.text; end if; if (alert_text is not null) and (start_updating = 1) then insert into alert_log values (alert_date, substr(alert_text, 1, 180)); rows_inserted := rows_inserted + 1; commit; end if; end loop; sys.dbms_output.put_line('Inserting after date '||to_char(max_date, 'MM/DD/RR HH24:MI:SS')); sys.dbms_output.put_line('Rows Inserted: '||rows_inserted); commit; end; /
Let's execute the script:
SQL> @update_alert_log Inserting after date 01/01/80 00:00:00 Rows Inserted: 17361 PL/SQL procedure successfully completed.
The
alert_log table now contains the errors as recorded in the
alert.log file:
select alert_date, substr(alert_text,1, 69) from alert_log; 12-JUL-06 Error stack returned to user: 12-JUL-06 ORA-02049: timeout: distributed transaction waiting for lock 16-JUL-06 Errors in file /usr/local/opt/oracle/admin/P2BL36A/udump/p2bl36a_ora_ 18-JUL-06 ORACLE Instance P2BL36A - Can not allocate log, archival required 18-JUL-06 Thread 1 cannot allocate new log, sequence 265396 18-JUL-06 All online logs needed archiving 18-JUL-06 ORACLE Instance P2BL36A - Can not allocate log, archival required 18-JUL-06 Thread 1 cannot allocate new log, sequence 265397 18-JUL-06 All online logs needed archiving 18-JUL-06 ORACLE Instance P2BL36A - Can not allocate log, archival required 18-JUL-06 Thread 1 cannot allocate new log, sequence 265398 18-JUL-06 All online logs needed archiving 23-JUL-06 Errors in file /usr/local/opt/oracle/admin/P2BL36A/udump/p2bl36a_ora_ 25-JUL-06 Errors in file /usr/local/opt/oracle/admin/P2BL36A/bdump/p2bl36a_s001 25-JUL-06 ORA-07445: exception encountered: core dump [kpnrls()+148] [SIGBUS] [ 25-JUL-06 ORA-03113: end-of-file on communication channel 25-JUL-06 ORA-02063: preceding line from FRD01 25-JUL-06 found dead shared server 'S001', pid = (21, 1) 27-JUL-06 alter tablespace TBS01 27-JUL-06 add datafile '/ora106/ORACLE/P2BL36A/dbf/tbs01P2BL36A_32.dbf' size 20 27-JUL-06 ORA-1119 signalled during: alter tablespace TBS01 27-JUL-06 add datafile '/ora106/ORACL... 27-JUL-06 alter tablespace TBS01 27-JUL-06 add datafile '/ora106/ORACLE/P2BL36A/dbf/tbs01P2BL36A_32.dbf' size 20 27-JUL-06 Completed: alter tablespace TBS01 27-JUL-06 add datafile '/ora106/ORACL 28-JUL-06 ORA-01555 caused by SQL statement below (Query Duration=59968 sec, SC 28-JUL-06 SELECT /*+ ORDERED NO_EXPAND USE_NL(A2) INDEX(A2 "FC_REQUEST_HISTORY_ Thanks
The scripts above are provided by
Shawn M Ellinger.
Links
See also
this procedure to read the alert log.
|
#########DB 端
step 1:备份数据库日志,并且清空当前日志
cp alert_<sid>.log alert_<sid>.log_2018
cat > alert_<sid>.log
##end
step 2:
##以下命令都在dbmgr 用户下使用,初始化 alert.log
####建立外部表,alert_log_disk(读取alert_sid.log),和并将数据同步到内部表alert_log
##The following script creates two tables: read_alert and read_alert_disk.
##read_alert_disk is an external table and contains the content of the alert log.
##read_alert will be empty after this script has been executed. It is used by the update_alert_log script, shown further below.
define alert_length="2000"
drop table alert_log;
create table alert_log (
alert_date date,
alert_text varchar2(&&alert_length)
)
storage (initial 512k next 512K pctincrease 0);
create index alert_log_idx on alert_log(alert_date)
storage (initial 512k next 512K pctincrease 0);
column db new_value _DB noprint;
column bdump new_value _bdump noprint;
select instance_name db from v$instance;
select value bdump from v$parameter
where name ='background_dump_dest';
drop directory BDUMP;
create directory BDUMP as '&&_bdump';
drop table alert_log_disk;
create table alert_log_disk ( text varchar2(&&alert_length) )
organization external (
type oracle_loader
default directory BDUMP
access parameters (
records delimited by newline nologfile nobadfile
fields terminated by "&" ltrim
)
location('alert_&&_DB..log')
)
reject limit unlimited;
step 3:
###new procedure insert_alert_log ,然后使用job 每天同步一次数据,从alert_log_disk到alert_log
3.1 create procedure
create or replace procedure insert_alert_log
as
isdate number := 0;
start_updating number := 0;
rows_inserted number := 0;
alert_date date;
max_date date;
alert_text alert_log_disk.text%type;
begin
/* find a starting date */
select max(alert_date) into max_date from alert_log;
if (max_date is null) then
max_date := to_date('01-jan-1980', 'dd-mon-yyyy');
end if;
for r in (
select substr(text,1,180) text from alert_log_disk
where text not like '%offlining%'
and text not like 'ARC_:%'
and text not like '%LOG_ARCHIVE_DEST_1%'
and text not like '%Thread 1 advanced to log sequence%'
and text not like '%Current log#%seq#%mem#%'
and text not like '%Undo Segment%lined%'
and text not like '%alter tablespace%back%'
and text not like '%Log actively being archived by another process%'
and text not like '%alter database backup controlfile to trace%'
and text not like '%Created Undo Segment%'
and text not like '%started with pid%'
and text not like '%ORA-12012%'
and text not like '%ORA-06512%'
and text not like '%ORA-000060:%'
and text not like '%coalesce%'
and text not like '%Beginning log switch checkpoint up to RBA%'
and text not like '%Completed checkpoint up to RBA%'
and text not like '%specifies an obsolete parameter%'
and text not like '%BEGIN BACKUP%'
and text not like '%END BACKUP%'
and text not like '%Incremental checkpoint%%'
)
loop
isdate := 0;
alert_text := null;
select count(*) into isdate
from dual
where substr(r.text, 21) in ('2018','2019','2020','2021','2022','2023','2024','2025')
and r.text not like '%cycle_run_year%';
if (isdate = 1) then
select to_date(substr(r.text, 5),'Mon dd hh24:mi:ss rrrr')
into alert_date
from dual;
if (alert_date > max_date) then
start_updating := 1;
end if;
else
alert_text := r.text;
end if;
if (alert_text is not null) and (start_updating = 1) then
insert into alert_log values (alert_date, substr(alert_text, 1, 180));
rows_inserted := rows_inserted + 1;
commit;
end if;
end loop;
sys.dbms_output.put_line('Inserting after date '||to_char(max_date, 'MM/DD/RR HH24:MI:SS'));
sys.dbms_output.put_line('Rows Inserted: '||rows_inserted);
commit;
end;
/
3.2 test the procedure
set serveroutput on
begin
insert_alert_log;
end;
/
select count(*) from alert_log;
####end
step 4
###授权
grant select on dbmgr.alert_log to dbmonopr;
step 5
##create job every day 22 hour,同步一次
SQL>variable job1 number;
SQL>
begin
dbms_job.submit(:job1,'INSERT_ALERT_LOG;','TRUNC(SYSDATE) + 22/24,''TRUNC(SYSDATE) + 22/24 + 1');
end;
/
SQL>print :job1
SQL>
begin
dbms_job.run(:job1);
end;
/
SQL>
select count(*) from dbmgr.alert_log;
###end
#########nagios 端,需要使用nagios 配置,都在nagios 用户下执行
step 1: /usr/local/ nagios /libexec 下的check_oracle,请在check_oracle 文件加入如下几行记录
##add before line "--db)"
--alert_log)
get_result=`sqlplus -silent /nolog<<EOF
conn dbmonopr/dbmonoprkms11@$2
set heading off feedback off pagesize 0 verify off echo off
select alert_date||' '||substr(alert_text,1, 69) from dbmgr.alert_log where alert_date > sysdate-2 and substr(alert_text,1, 69) like '%ORA-%';
exit;
EOF`
#echo $get_result
result=`echo "$get_result"`
if [ -z "$result" ] ; then
echo "OK - ${2} Database alert_log is ok."
exit 0
else
echo "CRITICAL - ${2} Database alert_log report ($result)."
exit 2
fi
;;
测试命令是否可以执行 <SID>是数据库名字。
/usr/local/nagios/libexec/check_oracle --alert_log <SID>
/usr/local/nagios/libexec/check_oracle --db <SID>
step 2. vim /usr/local/nagios/etc/nrpe.cfg add below command
command[check_db_alert_status]=/usr/local/nagios/libexec/check_oracle --alert_log <SID>
aintaining Oracle Logs and Diagnostic Files
Prior to version 11g, Oracle databases stored most log and diagnostic information in the $ORACLE_BASE/admin directory. Now in 11g, there is a new directory at $ORACLE_BASE/diagwith a different organizational structure. In either version, it is important to understand what to find in these directories and how to maintain them.
The Admin Directory
In Oracle 10g and earlier, logs and diagnostic information can typically be found in the directory $ORACLE_BASE/admin, but sometimes this admin directory is found in $ORACLE_HOME or is set to another specific location chosen by the administrator. The admin directory typically contains a folder for each database on a system. Within this folder, there are the bdump, cdump and udump directories, among other things.
These directories are populated with diagnostic information by the database. Most of the contents are unique files created by database events, but some files like the alert log are written to on an ongoing basis and will continue to grow if left alone. In either case, these files should occasionally be cleaned up to prevent them from taking up too much disk space.
Checking the Alert Log
The check_alert_log.sh script shown below checks for any ORA- errors in the alert logof a given database. It will take a SID as an argument, or if no SID is specified, it will use the current one if set. In large environments, it may be more desirable to set up a GRID Controller and use that to monitor multiple databases. However, if only a handful of instances need to be monitored, it can be much easier to just set up monitoring scripts like this one.
In order to avoid reporting the same error twice, this script copies the contents out of the alert log, checks for errors and then saves them in a log file by the same name, but with .1 appended to it.
#!/bin/bash
# Add /usr/local/bin to the PATH variable so the oraenv command can be found
PATH=$PATH:/usr/local/bin; export PATH
# If a SID is provided as an argument it will be set and oraenv run
# otherwise we will use the current SID. If no SID is set or provided
# an error message is displayed and the script exits with a status of 1
if [ $1 ]
then
ORACLE_SID=$1
ORAENV_ASK=NO
. oraenv
else
if [ ! $ORACLE_SID ]
then
echo "Error: No ORACLE_SID set or provided as an argument"
exit 1
fi
fi
# Set the ORACLE_BASE variable
ORACLE_BASE=/u01/app/oracle; export ORACLE_BASE
cd $ORACLE_BASE/admin/$ORACLE_SID/bdump
# Copy the current alert log into a temporary file and empty the original
cp alert_$ORACLE_SID.log alert_$ORACLE_SID.log.temp
cp /dev/null alert_$ORACLE_SID.log
# Check the copy in the temporary file for ORA- errors
grep 'ORA-' alert_$ORACLE_SID.log.temp > /dev/null
# If found, email the Oracle user with the contents of the alert log
if [ $? = 0 ]
then
mail -s "$ORACLE_SID database alert log error" oracle < \
alert_$ORACLE_SID.log.temp
fi
# Move the contents of the temp file onto the permanent copy of the log
# and remove the temp file.
cat alert_$ORACLE_SID.log.temp >> alert_$ORACLE_SID.log.1
rm alert_$ORACLE_SID.log.temp
The block of code which starts with the first if statementis one that will be seen in many of the scripts in this book. If a SID is provided, the $1 variable exists and the oraenv command is executed. If not, the second if statement within the ELSE clause checks to see if a SID has already been set. If it has not, an error message is printed and the script exited with a result code of 1, indicating failure. If this script fails when executed from a crontab, the error message is automatically emailed to the crontab owner.
Since this will likely be run from a crontab entry, you typically provide a SID as an argument. The following crontab entry causes this script to be run every 15 minutes.
00,15,30,45 * * * * /u01/app/oracle/admin/common/check_alert_log.sh oss
####check_oracle from nagios
#! /bin/ksh
#
# latigid010@yahoo.com
# 01/06/2000
#
# This Nagios plugin was created to check Oracle status
#
PROGNAME=`basename $0`
PROGPATH=`echo $0 | sed -e 's,[\\/][^\\/][^\\/]*$,,'`
REVISION="1.4.15"
. $PROGPATH/utils.sh
print_usage() {
echo "Usage:"
echo " $PROGNAME --tns <Oracle Sid or Hostname/IP address>"
echo " $PROGNAME --db <ORACLE_SID>"
echo " $PROGNAME --status <ORACLE_SID>"
echo " $PROGNAME --asm <ORACLE_SID>"
echo " $PROGNAME --flash <ORACLE_SID>"
echo " $PROGNAME --login <ORACLE_SID>"
echo " $PROGNAME --cache <ORACLE_SID> <USER> <PASS> <CRITICAL> <WARNING>"
echo " $PROGNAME --ttbs <ORACLE_SID>"
echo " $PROGNAME --tablespace <ORACLE_SID> <USER> <PASS> <TABLESPACE> <CRITICAL> <WARNING>"
echo " $PROGNAME --oranames <Hostname>"
echo " $PROGNAME --help"
echo " $PROGNAME --version"
}
print_help() {
print_revision $PROGNAME $REVISION
echo ""
print_usage
echo ""
echo "Check Oracle status"
echo ""
echo "--tns SID/IP Address"
echo " Check remote TNS server"
echo "--db SID"
echo " Check local database (search /bin/ps for PMON process) and check"
echo " filesystem for sgadefORACLE_SID.dbf"
echo "--status SID"
echo "--asm SID"
echo " Check ASM"
echo "--login SID"
echo " Attempt a dummy login and alert if not ORA-01017: invalid username/password"
echo "--cache"
echo " Check local database for library and buffer cache hit ratios"
echo " ---> Requires Oracle user/password and SID specified."
echo " ---> Requires select on v_$sysstat and v_$librarycache"
echo "--tablespace"
echo " Check local database for tablespace capacity in ORACLE_SID"
echo " ---> Requires Oracle user/password specified."
echo " ---> Requires select on dba_data_files and dba_free_space"
echo "--oranames Hostname"
echo " Check remote Oracle Names server"
echo "--help"
echo " Print this help screen"
echo "--version"
echo " Print version and license information"
echo ""
echo "If the plugin doesn't work, check that the ORACLE_HOME environment"
echo "variable is set, that ORACLE_HOME/bin is in your PATH, and the"
echo "tnsnames.ora file is locatable and is properly configured."
echo ""
echo "When checking local database status your ORACLE_SID is case sensitive."
echo ""
echo "If you want to use a default Oracle home, add in your oratab file:"
echo "*:/opt/app/oracle/product/7.3.4:N"
echo ""
support
}
case "$1" in
1)
cmd='--tns'
;;
2)
cmd='--db'
;;
*)
cmd="$1"
;;
esac
# Information options
case "$cmd" in
--help)
print_help
exit $STATE_OK
;;
-h)
print_help
exit $STATE_OK
;;
--version)
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
-V)
print_revision $PROGNAME $REVISION
exit $STATE_OK
;;
esac
# Hunt down a reasonable ORACLE_HOME
if [ -z "$ORACLE_HOME" ] ; then
# Adjust to taste
for oratab in /var/opt/oracle/oratab /etc/oratab
do
[ ! -f $oratab ] && continue
ORACLE_HOME=`IFS=:
while read SID ORACLE_HOME junk;
do
if [ "$SID" = "$2" -o "$SID" = "*" ] ; then
echo $ORACLE_HOME;
exit;
fi;
done < $oratab`
[ -n "$ORACLE_HOME" ] && break
done
fi
# Last resort
[ -z "$ORACLE_HOME" -a -d $PROGPATH/oracle ] && ORACLE_HOME=$PROGPATH/oracle
if [ "$cmd" != "--db" ]; then
if [ -z "$ORACLE_HOME" -o ! -d "$ORACLE_HOME" ] ; then
echo "Cannot determine ORACLE_HOME for sid $2"
exit $STATE_UNKNOWN
fi
fi
PATH=$PATH:$ORACLE_HOME/bin
LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$ORACLE_HOME/lib
export ORACLE_HOME PATH LD_LIBRARY_PATH
case "$cmd" in
--tns)
tnschk=` tnsping $2`
tnschk2=` echo $tnschk | grep -c OK`
if [ ${tnschk2} -eq 1 ] ; then
tnschk3=` echo $tnschk | sed -e 's/.*(//' -e 's/).*//'`
echo "OK - reply time ${tnschk3} from $2"
exit $STATE_OK
else
echo "No TNS Listener on $2"
exit $STATE_CRITICAL
fi
;;
--oranames)
namesctl status $2 | awk '
/Server has been running for:/ {
msg = "OK: Up"
for (i = 6; i <= NF; i++) {
msg = msg " " $i
}
status = '$STATE_OK'
}
/error/ {
msg = "CRITICAL: " $0
status = '$STATE_CRITICAL'
}
END {
print msg
exit status
}'
;;
--db)
pmonchk=`ps -ef | grep -v grep | grep -c "ora_pmon_${2}$"`
if [ ${pmonchk} -ge 1 ] ; then
echo "${2} OK - ${pmonchk} PMON process(es) running"
exit $STATE_OK
#if [ -f $ORACLE_HOME/dbs/sga*${2}* ] ; then
#if [ ${pmonchk} -eq 1 ] ; then
#utime=`ls -la $ORACLE_HOME/dbs/sga*$2* | cut -c 43-55`
#echo "${2} OK - running since ${utime}"
#exit $STATE_OK
#fi
else
echo "${2} Database is DOWN"
exit $STATE_CRITICAL
fi
;;
--login)
loginchk=`sqlplus dummy/user@$2 < /dev/null`
loginchk2=` echo $loginchk | grep -c ORA-01017`
if [ ${loginchk2} -eq 1 ] ; then
echo "OK - dummy login connected"
exit $STATE_OK
else
loginchk3=` echo "$loginchk" | grep "ORA-" | head -1`
echo "CRITICAL - $loginchk3"
exit $STATE_CRITICAL
fi
;;
--status)
get_result=`sqlplus dbmonopr/dbmonoprkms11@$2<<EOF
spool off;
select 'status=' || count(0) from dual;
quit;
EOF`
#echo $get_result
status=`echo "$get_result"|grep status|cut -d "=" -f2`
#echo $status
if [ ${status} -eq 1 ] ; then
echo "OK - ${2} Database running"
exit 0
else
echo "CRITICAL - ${2} Database is DOWN"
exit 2
fi
;;
--flash)
get_result=`sqlplus dbmonopr/dbmonoprkms11@$2<<EOF
spool off;
select 'space_area=' || round(sum(percent_space_used)/1110,2) from v\\$flash_recovery_area_usage;
quit;
EOF`
#echo $get_result
space_area=`echo "$get_result"|grep space_area|cut -d "=" -f2`
#echo $space_area
if [ ${space_area} -gt 80 ] ; then
echo "CRITICAL - ${2} Database($space_area%) flash space used"
exit 2
fi
if [ ${space_area} -gt 60 ] ; then
echo "WARNING - ${2} Database($space_area%) flash space used"
exit 0
fi
if [ ${space_area} -lt 60 ] ; then
echo "OK - ${2} Database($space_area%) flash space used"
exit 0
fi
;;
--asm)
get_result=`sqlplus dbmonopr/dbmonoprkms11@$2<<EOF
spool off;
select 'dbinfo=' || name || '|' || total_mb || '|' || free_mb || '|' || round((free_mb/total_mb)*100,2) as use from v\\$asm_diskgroup;
quit;
EOF`
status=`echo "$get_result"|grep dbinfo`
#echo $status
archiveloginfo=`echo $status|cut -d " " -f1|cut -d "|" -f4`
coredatainfo=`echo $status|cut -d " " -f2|cut -d "|" -f4`
recoveryinfo=`echo $status|cut -d " " -f3|cut -d "|" -f4`
#echo $archiveloginfo
#echo $coredatainfo
#echo $recoveryinfo
flag=0
if [ ${archiveloginfo} -lt 30 ] ; then
flag=1
fi
if [ ${archiveloginfo} -lt 5 ] ; then
flag=2
fi
if [ ${coredatainfo} -lt 30 ] ; then
flag=1
fi
if [ ${coredatainfo} -lt 5 ] ; then
flag=2
fi
if [ ${recoveryinfo} -lt 30 ] ; then
flag=1
fi
if [ ${recoveryinfo} -lt 5 ] ; then
flag=2
fi
if [ ${flag} -eq 0 ] ; then
echo "OK - ${2} Database asm(free archivelog:${archiveloginfo}%,free core_data:${coredatainfo}%,free recovery:${recoveryinfo}%) is ok."
exit 0
fi
if [ ${flag} -eq 1 ] ; then
echo "WARNING - ${2} Database asm(free archivelog:${archiveloginfo}%,free core_data:${coredatainfo}%,free recovery:${recoveryinfo}%) less than 30%."
exit 1
fi
if [ ${flag} -eq 2 ] ; then
echo "CRITICAL - ${2} Database asm(free archivelog:${archiveloginfo}%,free core_data:${coredatainfo}%,free recovery:${recoveryinfo}%) less than 10%."
exit 2
fi
;;
--cache)
if [ ${5} -gt ${6} ] ; then
echo "UNKNOWN - Warning level is less then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select (1-(pr.value/(dbg.value+cg.value)))*100
from v\\$sysstat pr, v\\$sysstat dbg, v\\$sysstat cg
where pr.name='physical reads'
and dbg.name='db block gets'
and cg.name='consistent gets';
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
buf_hr=`echo "$result" | awk '/^[0-9\. \t]+$/ {print int($1)}'`
buf_hrx=`echo "$result" | awk '/^[0-9\. \t]+$/ {print $1}'`
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select sum(lc.pins)/(sum(lc.pins)+sum(lc.reloads))*100
from v\\$librarycache lc;
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
lib_hr=`echo "$result" | awk '/^[0-9\. \t]+$/ {print int($1)}'`
lib_hrx=`echo "$result" | awk '/^[0-9\. \t]+$/ {print $1}'`
if [ $buf_hr -le ${5} -o $lib_hr -le ${5} ] ; then
echo "${2} CRITICAL - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_CRITICAL
fi
if [ $buf_hr -le ${6} -o $lib_hr -le ${6} ] ; then
echo "${2} WARNING - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_WARNING
fi
echo "${2} OK - Cache Hit Rates: $lib_hrx% Lib -- $buf_hrx% Buff|lib=$lib_hrx%;${6};${5};0;100 buffer=$buf_hrx%;${6};${5};0;100"
exit $STATE_OK
;;
--tbs)
get_result=`sqlplus -silent /nolog<<EOF
conn dbmonopr/dbmonoprkms11@$2
set heading off feedback off pagesize 0 verify off echo off
SELECT TRIM(DBMS_LOB.SUBSTR(WM_CONCAT(DATAVAL))) VALUE FROM (SELECT 'CNT=' || MAX(FLAG) AS DATAVAL FROM (SELECT '0' FLAG FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME NOT IN ('UNDOTBS1', 'UNDOTBS2') AND ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 2) < 90 UNION ALL SELECT '1' FLAG FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME NOT IN ('UNDOTBS1', 'UNDOTBS2') AND ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 2) >= 90 UNION ALL SELECT '2' FLAG FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / 1024 / 1024, 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+) AND D.TABLESPACE_NAME NOT IN ('UNDOTBS1', 'UNDOTBS2') AND ROUND((1 - NVL(FREE_SPACE, 0) / SPACE) * 100, 2) >= 95) UNION ALL SELECT TRIM(DBMS_LOB.SUBSTR(WM_CONCAT(ABC))) DATAVAL FROM (SELECT 'NAME=' || TABLESPACE_NAME || ':SUM=' || SUM_SPACE || ':USED=' || USED_SPACE AS ABC FROM (SELECT D.TABLESPACE_NAME, SPACE SUM_SPACE, BLOCKS SUM_BLOCKS, SPACE - NVL(FREE_SPACE, 0) USED_SPACE FROM (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / (1024 * 1024), 2) SPACE, SUM(BLOCKS) BLOCKS FROM DBA_DATA_FILES GROUP BY TABLESPACE_NAME) D, (SELECT TABLESPACE_NAME, ROUND(SUM(BYTES) / (1024 * 1024), 2) FREE_SPACE FROM DBA_FREE_SPACE GROUP BY TABLESPACE_NAME) F WHERE D.TABLESPACE_NAME = F.TABLESPACE_NAME(+)) A));
exit;
EOF`
#echo $get_result
result=`echo "$get_result"`
PERCNT=`echo $result|grep CNT|cut -d "," -f 1|cut -d "=" -f 2`
space_val=`echo $result|grep CNT|cut -d "," -f 2-`
if [ $PERCNT -eq 0 ] ; then
echo "OK - ${2} Database tablespace($space_val) is ok."
exit 0
fi
if [ $PERCNT -eq 1 ] ; then
echo "WARNING - ${2} Database tablespace($space_val) large than 90%."
exit 1
fi
if [ $PERCNT -eq 2 ] ; then
echo "CRITICAL - ${2} Database tablespace($space_val) large than 95%."
exit 2
fi
;;
--tablespace)
if [ ${6} -lt ${7} ] ; then
echo "UNKNOWN - Warning level is more then Crit"
exit $STATE_UNKNOWN
fi
result=`sqlplus -s ${3}/${4}@${2} << EOF
set pagesize 0
set numf '9999999.99'
select NVL(b.free,0.0),a.total,100 - trunc(NVL(b.free,0.0)/a.total * 1000) / 10 prc
from (
select tablespace_name,sum(bytes)/1024/1024 total
from dba_data_files group by tablespace_name) A
LEFT OUTER JOIN
( select tablespace_name,sum(bytes)/1024/1024 free
from dba_free_space group by tablespace_name) B
ON a.tablespace_name=b.tablespace_name WHERE a.tablespace_name='${5}';
EOF`
if [ -n "`echo $result | grep ORA-`" ] ; then
error=` echo "$result" | grep "ORA-" | head -1`
echo "CRITICAL - $error"
exit $STATE_CRITICAL
fi
ts_free=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($1)}'`
ts_total=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($2)}'`
ts_pct=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print int($3)}'`
ts_pctx=`echo "$result" | awk '/^[ 0-9\.\t ]+$/ {print $3}'`
if [ "$ts_free" -eq 0 -a "$ts_total" -eq 0 -a "$ts_pct" -eq 0 ] ; then
echo "No data returned by Oracle - tablespace $5 not found?"
exit $STATE_UNKNOWN
fi
if [ "$ts_pct" -ge ${6} ] ; then
echo "${2} : ${5} CRITICAL - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_CRITICAL
fi
if [ "$ts_pct" -ge ${7} ] ; then
echo "${2} : ${5} WARNING - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_WARNING
fi
echo "${2} : ${5} OK - $ts_pctx% used [ $ts_free / $ts_total MB available ]|${5}=$ts_pctx%;${7};${6};0;100"
exit $STATE_OK
;;
*)
print_usage
exit $STATE_UNKNOWN
esac