一、 检查数据库基本状况
--1.1检查Oracle实例状态
/*注:若是 oracle rac (v$instance替换为gv$instance)
其中“STATUS”表示Oracle当前的实例状态,必须上为“OPEN”;
“DATABASE_STATUS”表示Oracle当前数据库的状态,必须为“ACTIVE”;*/
select instance_name, host_name, startup_time, status, database_status from gv$instance;
--1.2 检查Oracle在线日志状态
--输出结果应该有3条以上(包含3条)记录,“STATUS”应该为非“INVALID”,非“DELETED”。注:“STATUS”显示为空表示正常。
select group#, status, type, member from gv$logfile;
select * from gv$log;
SELECT TRUNC(first_time) "DATE_TIME",
--TO_CHAR(first_time, 'Dy') "Day",
COUNT(1) "COUNT",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '00', 1, 0)) "HH0",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '01', 1, 0)) "HH1",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '02', 1, 0)) "HH2",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '03', 1, 0)) "HH3",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '04', 1, 0)) "HH4",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '05', 1, 0)) "HH5",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '06', 1, 0)) "HH6",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '07', 1, 0)) "HH7",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '08', 1, 0)) "HH8",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '09', 1, 0)) "HH9",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '10', 1, 0)) "HH10",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '11', 1, 0)) "HH11",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '12', 1, 0)) "HH12",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '13', 1, 0)) "HH13",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '14', 1, 0)) "HH14",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '15', 1, 0)) "HH15",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '16', 1, 0)) "HH16",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '17', 1, 0)) "HH17",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '18', 1, 0)) "HH18",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '19', 1, 0)) "HH19",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '20', 1, 0)) "HH20",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '21', 1, 0)) "HH21",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '22', 1, 0)) "HH22",
SUM(DECODE(TO_CHAR(first_time, 'hh24'), '23', 1, 0)) "HH23"
-- ROUND(COUNT(1) / 24, 2) "Avg"
FROM gv$log_history
--WHERE thread# = inst_id AND inst_id = 1
WHERE first_time >= TO_DATE('2023/12/15','YYYY/MM/DD') AND first_time <= TO_DATE('2024/01/03','YYYY/MM/DD')
GROUP BY TRUNC(first_time), TO_CHAR(first_time, 'Dy')
ORDER BY 1
--实际归档日志大小
select a.RECID,
a.NAME,
a.DEST_ID,
a.THREAD#,
a.SEQUENCE#,
a.RESETLOGS_TIME,
a.FIRST_TIME,
a.BLOCKS * a.BLOCK_SIZE / 1024 / 1024 as MB
from v$archived_log a
WHERE FIRST_TIME between to_date('2024-03-07', 'yyyy-mm-dd') and
to_date('2024-03-09', 'yyyy-mm-dd')
and DEST_ID = 1
--1.3 检查Oracle表空间的状态
--输出结果中STATUS应该都为ONLINE。
select tablespace_name,status from dba_tablespaces;
--1.4 检查Oracle所有数据文件状态
--输出结果中“STATUS”应该都为“ONLINE”。或者:
select name, status from gv$datafile;
--输出结果中“STATUS”应该都为“AVAILABLE”;
select file_name, status from dba_data_files;
--1.5 检查无效对象
--如果有记录返回,则说明存在无效对象。若这些对象与应用相关,那么需要重新编译生成这个对象
select owner, object_type, object_name, status, created, last_ddl_time from dba_objects
where status!='VALID' and owner NOT IN ('SYS','SYSTEM','DBSNMP','XDB','GSMADMIN_INTERNAL','WMSYS','SYSMAN','OUTLN', 'EXFSYS', 'APEX_030200', 'ORDDATA', 'MDSYS', 'APPQOSSYS', 'ORDSYS', 'FLOWS_FILES', 'PUBLIC')
;
--或者:
SELECT owner, object_name, object_type,status FROM dba_objects WHERE status= 'INVALID';
--编译失效对象
SELECT 'ALTER ' || object_type || ' ' || owner || '.' || object_name || ' COMPILE;'
FROM dba_objects WHERE status = 'INVALID';
--检查表数据量
SELECT OWNER, TABLE_NAME, TABLESPACE_NAME, NUM_ROWS FROM dba_tables WHERE owner not in('SYS','SYSTEM','OUTLN','DBSNMP','APPQOSSYS',
'EXFSYS','CTXSYS','XDB','ORDSYS','MDSYS','OLAPSYS','ORDDATA','SYSMAN','APEX_030200')
AND temporary = 'N'
AND (owner, table_name) NOT IN (SELECT distinct TABLE_OWNER, TABLE_NAME FROM dba_tab_partitions)
ORDER BY num_rows DESC nulls LAST;
--1.6 检查所有回滚段状态
--输出结果中所有回滚段的“STATUS”应该为“ONLINE”。
select segment_name, status from dba_rollback_segs;
二、 检查Oracle相关资源的使用情况
包含:
a.检查Oracle初始化文件中相关的参数值
b.检查数据库连接情况,检查系统磁盘空间
c.检查Oracle各个表空间使用情况,检查一些扩展异常的对象,
d.检查system表空间内的内容,检查对象的下一扩展与表空间的最大扩展值,总共七个部分。
--2.1 检查Oracle初始化文件中相关参数值
--若LIMIT_VALU-MAX_UTILIZATION<=5,则表明与RESOURCE_NAME相关的Oracle初始化参数需要调整。可以通过修改Oracle初始化参数文件$ORACLE_BASE/admin/CKDB/pfile/initORCL.ora来修改。
select resource_name, max_utilization, initial_allocation, limit_value from gv$resource_limit;
--2.2 检查数据库连接情况
--查看当前会话连接数,是否属于正常范围。
/*其中:SID 会话(session)的ID号;
SERIAL# 会话的序列号,和SID一起用来唯一标识一个会话;
USERNAME 建立该会话的用户名;
PROGRAM 这个会话是用什么工具连接到数据库的;
STATUS 当前这个会话的状态,ACTIVE表示会话正在执行某些任务,INACTIVE表示当前会话没有执行任何操作;
如果建立了过多的连接,会消耗数据库的资源,同时,对一些“挂死”的连接可能需要手工进行清理。如果DBA要手工断开某个会话,则执行:
(一般不建议使用这种方式去杀掉数据库的连接,这样有时候session不会断开。容易引起死连接。建议通过sid查到操作系统的spid,使用ps –ef|grep spidno的方式确认spid不是ORACLE的后台进程。使用操作系统的kill -9命令杀掉连接)
alter system kill session 'SID,SERIAL#';
注意:上例中SID为1到10(USERNAME列为空)的会话,是Oracle的后台进程,不要对这些会话进行任何操作。*/
select count(*) from gv$session;
select sid, serial#, username, program, machine, status from gv$session;
--数据库自上次启动以来达到的最大值
select resource_name, max_utilization, LIMIT_VALUE from v$resource_limit where resource_name in ('processes', 'sessions');
--2.3 检查系统磁盘空间
--如果文件系统的剩余空间过小或增长较快,需对其进行确认并删除不用的文件以释放空间。
[oracle@ocdb1 ~]$ df -h
/*文件系统 容量 已用 可用 已用% 挂载点
devtmpfs 16G 0 16G 0% /dev
tmpfs 16G 441M 16G 3% /dev/shm
tmpfs 16G 82M 16G 1% /run
tmpfs 16G 0 16G 0% /sys/fs/cgroup
/dev/vda6 78G 8.9G 69G 12% /
/dev/vda2 120G 9.4G 111G 8% /u01
/dev/vda5 10G 33M 10G 1% /tmp
/dev/vda1 497M 239M 258M 49% /boot
tmpfs 1.2G 12K 1.2G 1% /run/user/42
tmpfs 1.2G 0 1.2G 0% /run/user/0*/
--2.4 检查表空间使用情况
select t.tablespace_name,
trunc((d.tbs_size - nvl(s.free_space, 0)) / 1024 / 1024) used_M,
trunc(d.tbs_size / 1024 / 1024) L_space,
trunc(d.tbs_maxsize / 1024 / 1024) P_space,
trunc(nvl(s.free_space, 0) / 1024 / 1024) L_free,
trunc((d.tbs_maxsize - d.tbs_size + nvl(s.free_space, 0)) / 1024 / 1024) P_free,
decode(d.tbs_maxsize,
0,
0,
trunc((d.tbs_size - nvl(s.free_space, 0)) * 100 /
d.tbs_maxsize)) P_usage
from (select SUM(bytes) tbs_size,
SUM(decode(sign(maxbytes - bytes), -1, bytes, maxbytes)) tbs_maxsize,
tablespace_name tablespace
from (select nvl(bytes, 0) bytes,
nvl(maxbytes, 0) maxbytes,
tablespace_name
from dba_data_files
union all
select nvl(bytes, 0) bytes,
nvl(maxbytes, 0) maxbytes,
tablespace_name
from dba_temp_files)
group by tablespace_name) d,
(select sum(bytes) free_space, tablespace
from (select bytes, tablespace_name tablespace
from dba_free_space
union all
select free_space bytes, tablespace_name tablespace
from dba_temp_free_space)
group by tablespace) s,
dba_tablespaces t
where t.tablespace_name = d.tablespace(+)
and t.tablespace_name = s.tablespace(+)
order by 7 desc;
--2.5 检查一些扩展异常的对象
--如果有记录返回,则这些对象的扩展已经快达到它定义时的最大扩展值。对于这些对象要修改它的存储结构参数。
select Segment_Name,
Segment_Type,
TableSpace_Name,
(Extents / Max_extents) * 100 Percent
From sys.DBA_Segments
Where Max_Extents != 0
and (Extents / Max_extents) * 100 >= 95
order By Percent;
--2.6 检查system表空间内的内容
--如果记录返回,则表明system表空间内存在一em些非syst和sys用户的对象。应该进一步检查这些对象是否与我们应用相关。
--如果相关请把这些对象移到非System表空间,同时应该检查这些对象属主的缺省表空间值。
select distinct (owner), table_name
from dba_tables
where tablespace_name = 'SYSTEM'
and owner != 'SYS'
and owner != 'SYSTEM'
union
select distinct (owner), index_name
from dba_indexes
where tablespace_name = 'SYSTEM'
and owner != 'SYS'
and owner != 'SYSTEM';
--2.7 检查对象的下一扩展与表空间的最大扩展值
--如果有记录返回,则表明这些对象的下一个扩展大于该对象所属表空间的最大扩展值,需调整相应表空间的存储参数。
select a.table_name, a.next_extent, a.tablespace_name
from all_tables a,
(select tablespace_name, max(bytes) as big_chunk
from dba_free_space
group by tablespace_name) f
where f.tablespace_name = a.tablespace_name
and a.next_extent > f.big_chunk
union
select a.index_name, a.next_extent, a.tablespace_name
from all_indexes a,
(select tablespace_name, max(bytes) as big_chunk
from dba_free_space
group by tablespace_name) f
where f.tablespace_name = a.tablespace_name
and a.next_extent > f.big_chunk;
三、检查Oracle数据库性能
--3.1 检查数据库的等待事件
--如果数据库长时间持续出现大量像latch free,enqueue,buffer busy waits,db file sequential read,db file scattered read等等待事件时,需要对其进行分析,可能存在问题的语句。
select sid, event, p1, p2, p3, WAIT_TIME, SECONDS_IN_WAIT
from gv$session_wait
where event not like 'SQL%'
and event not like 'rdbms%';
--3.2 Disk Read最高的SQL语句的获取
SELECT SQL_TEXT, DISK_READS
FROM (SELECT * FROM GV$SQLAREA ORDER BY DISK_READS)
WHERE ROWNUM <= 5;
--3.3 top10 sql
select *
from (select sa.SQL_TEXT,
sa.SQL_FULLTEXT,
sa.sql_id,
sa.EXECUTIONS, -- "执行次数"
round(sa.ELAPSED_TIME / 1000000, 2) ELAPSED_TIME, --"总执行时间"
round(sa.ELAPSED_TIME / 1000000 / sa.EXECUTIONS, 2) "Elapsed Time per Exec (s)", --"平均执行时间"
--sa.COMMAND_TYPE,
--sa.PARSING_USER_ID "用户ID",
u.username , --"用户名"
sa.MODULE
--sa.HASH_VALUE
from v$sqlarea sa
left join all_users u
on sa.PARSING_USER_ID = u.user_id
where sa.EXECUTIONS > 0
and username not in ('SYS','SYSTEM','ZABBIX')
order by (sa.ELAPSED_TIME / sa.EXECUTIONS) desc)
where rownum <= 20;
--3.4 等待时间最多的5个系统等待事件的获取
SELECT *
FROM (SELECT *
FROM GV$SYSTEM_EVENT
WHERE EVENT NOT LIKE 'SQL%'
ORDER BY TOTAL_WAITS DESC)
WHERE ROWNUM <= 5;
--3.5 检查运行很久的SQL
SELECT USERNAME,
SID,
OPNAME,
ROUND(SOFAR * 100 / TOTALWORK, 0) || '%' AS PROGRESS,
TIME_REMAINING,
SQL_TEXT
FROM GV$SESSION_LONGOPS, GV$SQL
WHERE TIME_REMAINING <> 0
AND SQL_ADDRESS = ADDRESS
AND SQL_HASH_VALUE = HASH_VALUE;
--3.6 检查消耗CPU最高的进程
SELECT P.PID PID,
S.SID SID,
P.SPID SPID,
S.USERNAME USERNAME,
S.OSUSER OSNAME,
P.SERIAL# S_#,
P.TERMINAL,
P.PROGRAM PROGRAM,
P.BACKGROUND,
S.STATUS,
RTRIM(SUBSTR(A.SQL_TEXT, 1, 80)) SQL
FROM GV$PROCESS P,
GV$SESSION S,
GV$SQLAREA A WHERE P.ADDR = S.PADDR AND S.SQL_ADDRESS = A.ADDRESS(+)
AND P.SPID LIKE '%&1%';
--3.7 检查碎片程度高的表
SELECT segment_name table_name, COUNT(*) extents
FROM dba_segments
WHERE owner NOT IN ('SYS', 'SYSTEM')
GROUP BY segment_name
HAVING COUNT(*) = (SELECT MAX(COUNT(*))
FROM dba_segments
GROUP BY segment_name);
--3.8 检查表空间的I/O比例
SELECT DF.TABLESPACE_NAME NAME,
DF.FILE_NAME "FILE",
F.PHYRDS PYR,
F.PHYBLKRD PBR,
F.PHYWRTS PYW,
F.PHYBLKWRT PBW
FROM GV$FILESTAT F, DBA_DATA_FILES DF
WHERE F.FILE# = DF.FILE_ID
ORDER BY DF.TABLESPACE_NAME;
--3.9 检查文件系统的I/O比例
SELECT SUBSTR(A.FILE#, 1, 2) "#",
SUBSTR(A.NAME, 1, 30) "NAME",
A.STATUS,
A.BYTES,
B.PHYRDS,
B.PHYWRTS
FROM GV$DATAFILE A, GV$FILESTAT B
WHERE A.FILE# = B.FILE#;
--3.10 检查死锁及处理
/*oracle级kill掉该session:
alter system kill session '&sid,&serial#';
操作系统级kill掉session:
#>kill -9 pid*/
select sid,
serial#,
username,
SCHEMANAME,
osuser,
MACHINE,
terminal,
PROGRAM,
owner,
object_name,
object_type,
o.object_id
from dba_objects o, gv$locked_object l, gv$session s
where o.object_id = l.object_id
and s.sid = l.session_id;
--4.5 查看是否有僵尸进程
--有些僵尸进程有阻塞其他业务的正常运行,定期杀掉僵尸进程。
select * from gv$process where addr not in (select paddr from gv$session);
--4.6 检查缓冲区命中率
--如果命中率低于90%则需加大数据库参数db_cache_size。
SELECT a.VALUE + b.VALUE logical_reads,
c.VALUE phys_reads,
round(100 * (1 - c.value / (a.value + b.value)), 4) hit_ratio
FROM gv$sysstat a, gv$sysstat b, gv$sysstat c
WHERE a.NAME = 'db block gets'
AND b.NAME = 'consistent gets'
AND c.NAME = 'physical reads';
--3.11 检查共享池命中率
--如低于95%,则需要调整应用程序使用绑定变量,或者调整数据库参数shared pool的大小。
select sum(pinhits) / sum(pins) * 100 from gv$librarycache;
--3.12 检查排序区
/*如果disk/(memoty+row)的比例过高,则需要调整
sort_area_size(workarea_size_policy=false)或
pga_aggregate_target(workarea_size_policy=true)。
*/
select name, value, 1793/(779452489+142681940135) from gv$sysstat where name like '%sort%';
--3.13 检查日志缓冲区
--如果redo buffer allocation retries/redo entries超过1%,则需要增大log_buffer
select name, value, 284420/698798527 from gv$sysstat where name in ('redo entries','redo buffer allocation retries');
四、检查数据库cpu、I/O、内存性能(Linux)
记录数据库的cpu使用、IO、内存等使用情况,使用vmstat,iostat,sar,top等命令进行信息收集并检查这些信息,判断资源使用情况。
--4.1 CPU使用情况
--注意上面的蓝色字体部分,此部分内容表示系统剩余的cpu,当其平均值下降至10%以下的时视为CPU使用率异常,需记录下该数值,并将状态记为异常。
[oracle@ocdb1 ~]$ top
--4.2 内存使用情况
--蓝色部分表示系统总内存,红色部分表示系统使用的内存,紫色部分表示系统剩余内存,当剩余内存低于总内存的10%时视为异常。
[oracle@ocdb1 ~]$ free -h
--4.3 系统I/O情况
--蓝色字体部分表示磁盘读写情况,红色字体部分为cpu IO等待情况。
[oracle@ocdb1 ~]$ iostat -k 1 3
--4.4 系统负载情况
--蓝体字部分表示系统负载,后面的3个数值如果有高于CPU核数*0.7的时候就表明系统在超负荷运转了,并将此值记录到巡检表,视为异常
[oracle@ocdb1 ~]$ uptime
五、其他检查
--5.1 监控数据量的增长情况
--5.2 检查失效的索引
/*
VAILD:有效
N/A:分区索引
UNUSABLE:无效
注:分区表上的索引status为N/A是正常的,如有失效索引则对该索引做rebuild,如:
alter index INDEX_NAME rebuild tablespace TABLESPACE_NAME;
*/
select owner, index_name, table_name, tablespace_name, status
From dba_indexes
Where status <> 'VALID'
and owner not in('SYS','AUDSYS','SYSTEM');
--5.3 检查不起作用的约束
/*
如有失效约束则启用,如:
alter Table TABLE_NAME Enable Constraints CONSTRAINT_NAME;
*/
SELECT owner, constraint_name, table_name, constraint_type, status
FROM dba_constraints
WHERE status = 'DISABLE'
and constraint_type = 'P';
--5.4 检查无效的trigger
/*
如有失效触发器则启用,如:
alter Trigger TRIGGER_NAME Enable;
*/
SELECT owner, trigger_name, table_name, status FROM dba_triggers WHERE status = 'DISABLED';
--5.5 检查表和索引的并行度
select owner, table_name, degree from dba_tables a
where degree <> 1;
select owner, table_name, index_name, degree from dba_indexes
where degree <> '1';
--取消表/索引并行
alter table tablename noparallel;
alter index idx_dave_id noparallel;
Reply:取消索引并行不锁表