初步完成下前几天给自己布置了一个作业,
对oracle一些关键指标进行分析,后续通过python编程开发后加入维护系统,以供分析使用。
这两天先弄了个版本,学习我社会哥,话不多说,搞之。
程序功能:通过python程序对oracle awr文件进行读取,当然这个是v1.0版本,这个版本后面还可以继续优化,可作为巡检或分析或定期监控:。
执行情况: python ora_awr_analysis.py
代码如下:
#!/usr/bin/python
# -*-coding:gbk -*-
# created by yujianfeng
# date @2018-10-3
# version:1.0
# 获取oracle的AWR报表中的数据库指标
import os
from bs4 import BeautifulSoup as BS
import pandas as pd
import datetime
'''
get_tr方法得到HTML文件中全部table中的每一行tr,并存储在长度为len(table)列表中
'''
def get_tr(file_path):
f = open(file_path, encoding="gbk")
count = f.readlines()
f.close()
str = count[0:count.index('Main Report\n')]
html = ''
for i in str:
html = html + i
soup = BS(html,'html.parser')
t = soup.find_all("table")
tr = []
for i in t:
tr.append(i.find_all("tr"))
#print (tr)
return tr
'''
parse_table方法传入参数tr_n,即为每一个table生成的tr列表
'''
def parse_table(tr_n):
th = []
td = []
for i in range(len(tr_n)):
if i == 0 and tr_n[i].find("th") != None:
th = tr_n[i].find_all("th")
else:
td.append(tr_n[i].find_all("td"))
attribute = []
for i in range(len(th)):
if th[i].contents == []:
attribute.append('')
else:
attribute.append(th[i].contents[0].strip())
value = [[] for i in range(len(td))]
for i in range(len(td)):
for j in range(len(td[i])):
value[i].append(td[i][j].contents[0].strip())
return (attribute, value)
'''
有列标题的表格最后生成DataFrame
'''
def df_create(t):
index = []
for i in t[1]:
index.append(i[0])
col = [[] for i in range(len(t[0]) - 1)]
for i in range(1, len(t[0])):
for j in range(len(t[1])):
col[i - 1].append(t[1][j][i])
s = [[] for i in range(len(col))]
for i in range(len(col)):
s[i] = pd.Series(col[i], index=index, name=t[0][i + 1])
df = pd.DataFrame(s).T
return df
'''
针对第5个表格(无标题)但出现索引叠加情况,特对此进行处理 例如说[1,2,3,4] >> [[1,2],[3,4]]
'''
def list2split(list):
l1 = []
l2 = []
for i in range(len(list)):
if i == 0 or i == 1:
l1.append(list[i])
else:
l2.append(list[i])
return [l1, l2]
'''
没有标题的表格生成Series
'''
def series_create(t):
if len(t[1][0]) == 4:
temp = []
for i in t[1]:
temp.append(list2split(i))
result = []
for i in temp:
for j in i:
result.append(j)
else:
result = t[1]
index = []
for i in result:
index.append(i[0])
col = []
for i in result:
col.append(i[-1])
s = pd.Series(col, index=index)
return s
tr_total = []
#file_path = 'D:/awr_1_82_83.html'
file_path = 'D:/study/python study/python_practice/python3_code/oracle_analysis/AWR_Report_Parse-master/file'
file_names = os.listdir(file_path)
for i in range(len(file_names)):
tr_total.append(get_tr(file_path+"/"+file_names[i]))
#print(tr_total)
print("第%d篇文档处理完毕"%(i+1)) # 全部表格的每一行(数据结构为[[table1],[table2],..[table n]],table1中为[tr1,tr2....])
table_total = [[] for i in range(len(tr_total))] # 创建table列表,table中每一元素为(attribute,value)的元组
for i in range(len(tr_total)):
for j in range(len(tr_total[i])):
table_total[i].append(parse_table(tr_total[i][j]))
table_result = [[[] for j in range(len(table_total[0]))] for i in range(len(table_total))] #列表生成式,生成对应文件的表格数据结构
for i in range(len(table_total)):
for j in range(len(table_total[i])):
if table_total[i][j][0] == []:
table_result[i][j] = series_create(table_total[i][j])
else:
table_result[i][j] = df_create(table_total[i][j])
temp = [[[] for j in range(len(table_total[0]))] for i in range(len(table_total))]
for i in range(len(table_result)):
for j in range(len(table_result[i])):
if isinstance(table_result[i][j], pd.Series):
table_result[i][j] = table_result[i][j].to_frame()
temp[i][j] = table_result[i][j]
else:
temp[i][j] = table_result[i][j]
table_result = temp
#print(table_result)
#print(len(table_result))
'''
第1张表中选取指标DB ID,Instance,Host(保留Host字段)
'''
DB_ID = [] #DB_ID元素类型为str
for i in range(len(table_result)):
DB_ID.append(table_result[i][0]["DB Id"][0])
print(DB_ID)
Instance = [] #Instance元素类型为str
for i in range(len(table_result)):
Instance.append(table_result[i][0]["Instance"][0])
print(Instance)
'''
第3张表中选取指标Elapsed,DB Time
'''
Elapsed = [] #Elapsed元素类型为float,单位为(mins)
for i in range(len(table_result)):
Elapsed.append(float(table_result[i][2]["Snap Time"]["Elapsed:"].split("(mins)")[0]))
print(Elapsed)
DB_Time = [] #DB_Time元素类型为float,单位为(mins)
for i in range(len(table_result)):
DB_Time.append(float(table_result[i][2]["Snap Time"]["DB Time:"].split("(mins)")[0]))
print(DB_Time)
'''
第4张表"Load Profile"选取指标 Redo Size,Logical reads,Block changes,Physical reads,Physical writes,User calls,Parses,Hard parses,Sorts,Logons,Executes
'''
Redo_Size = []
for i in range(len(table_result)):
Redo_Size.append(float(table_result[i][3]["Per Transaction"]["Redo size (bytes):"].replace(",", '')))
print(Redo_Size)
Logical_Reads = []
for i in range(len(table_result)):
Logical_Reads.append(float(table_result[i][3]["Per Transaction"]["Logical read (blocks):"].replace(",", '')))
print(Logical_Reads)
Block_Changes = []
for i in range(len(table_result)):
Block_Changes.append(float(table_result[i][3]["Per Transaction"]["Block changes:"].replace(",", '')))
Physical_Reads = []
for i in range(len(table_result)):
Physical_Reads.append(float(table_result[i][3]["Per Transaction"]["Physical read (blocks):"].replace(",", '')))
Physical_Writes = []
for i in range(len(table_result)):
Physical_Writes.append(float(table_result[i][3]["Per Transaction"]["Physical write (blocks):"].replace(",", '')))
User_Calls = []
for i in range(len(table_result)):
User_Calls.append(float(table_result[i][3]["Per Transaction"]["User calls:"].replace(",", '')))
Parses = []
for i in range(len(table_result)):
Parses.append(float(table_result[i][3]["Per Transaction"]["Parses (SQL):"].replace(",", '')))
Hard_Parses = []
for i in range(len(table_result)):
Hard_Parses.append(float(table_result[i][3]["Per Transaction"]["Hard parses (SQL):"].replace(",", '')))
Logons = []
for i in range(len(table_result)):
Logons.append(float(table_result[i][3]["Per Transaction"]["Logons:"].replace(",", '')))
Executes = []
for i in range(len(table_result)):
Executes.append(float(table_result[i][3]["Per Transaction"]["Executes (SQL):"].replace(",", '')))
'''
第5张表中选取指标Buffer Hit %
"Instance Efficiency Percentages (Target 100%)"
'''
Buffer_Hit = []
for i in range(len(table_result)):
Buffer_Hit.append(float(table_result[i][4][0]["Buffer Hit %:"]))
'''
第6张表中选取指标DB CPU
"Instance Efficiency Percentages (Target 100%)"
'''
DB_CPU = []
for i in range(len(table_result)):
DB_CPU.append(float(table_result[i][5]["% DB time"]["DB CPU"]))
print(DB_CPU)
复制代码
程序开发实践步骤:
一:在虚拟机中oracle系统中生成的oracle的awr报告:
生成AWR报告:
#sqlplus / as sysdba
SQL> @?/rdbms/admin/awrrpt.sql;
按提示进行操作:
a.选择报告格式,默认回车即可;
b.选择快照历史,当天的输入1,表示1天的快照;
c.选择快照开始点,输入对应的“Snap Id”;
d.选择快照结束点,输入对应的“Snap Id”;
f.根据提示输入awr报告名称,此处回车按默认名称(awr_1_&开始点_&结束点.html)即可,该步骤的名称可以指定绝对路径,如:/home/oracle/awr_1_&开始点_&结束点.html;
复制代码
报告格式参见附图
第二步:对产生的awr报告,写一个python程序ora_awr_analysis.py对awr报告的指数进行分析和读取,