python爬取指定数据_【Python】实现从AWR 报表上抓取指定数据

因为写关于数据库性能周报要查找和计算AWR报表上的一些关键指标的值,每次手工收集数据都花很长时间,写了一个python工具来获取自己想要的值,并做了计算!(现在看来还不太完善,以后会更贴近写周报的需求)

import sys

import urllib

import HTMLParser

import string

sum_Logical=0

sum_Physical_reads=0

sum_Physical_writes=0

sum_Executes=0

sum_Transactions=0

##因为从awr获取的数值为unicode类型的,必须把值转换为数字!

def utof(s1):

s2=s1.strip()

s3=s2.encode('utf-8')

s4=s3.split(',')

length=len(s4)

if length <=1 :

t1= string.atof(s4[0])

return t1

elif length == 2:

t1=string.atof(s4[1])+string.atof(s4[0])*1000

return t1

elif length == 3:           t1=string.atof(s4[2])+string.atoi(s4[1])*1000+string.atoi(s4[0])*1000000

return t1

else:

return 0

##类是解析html并且从html上获取想要的数据

urltext = []

class CustomParser(HTMLParser.HTMLParser):

selected=('table', 'h1', 'font', 'ul', 'li', 'tr', 'td', 'a')

def reset(self):

HTMLParser.HTMLParser.reset(self)

self._level_stack = []

def handle_starttag(self, tag, attrs):

if tag in CustomParser.selected:

self._level_stack.append(tag)

def handle_endtag(self, tag):

if self._level_stack \

and tag in CustomParser.selected \

and tag == self._level_stack[-1]:

self._level_stack.pop()

##获取html上出去标签之后的文本数据

def handle_data(self, data):

if "/".join(self._level_stack) in ('table/tr/td','table/tr/td/h1/font','table/tr/td/ul/li') and data !='\n':

urltext.append(data)

##对传入的url 进行解析并获取数据

def gethtml(url):

content = unicode(urllib.urlopen(url,params).read(), 'GB2312')

parser = CustomParser()

parser.feed(content)

parser.close()

Logical=[]

Physical_reads=[]

Physical_writes=[]

Executes=[]

Transactions=[]

###计算想要的数据

def calucate(urltext):

print '-----------------------------------------'

global sum_Logical

global sum_Physical_reads

global sum_Physical_writes

global sum_Executes

global sum_Transactions

k=0

for item in urltext:

k=k+1

if k<50 :

continue

elif item =='Logical reads:' :

sum_Logical +=utof(urltext[k])

print 'Logical reads:     ' ,urltext[k].strip()

elif item == 'Physical reads:' :

sum_Physical_reads +=utof(urltext[k])

print 'Physical reads:    ',urltext[k].strip()

elif item == 'Physical writes:' :

sum_Physical_writes +=utof(urltext[k])

print 'Physical writes:   ' ,urltext[k].strip()

elif item =='Executes:':

sum_Executes += utof(urltext[k])

print 'Executes:          ' ,urltext[k].strip()

elif item == 'Transactions:' :

sum_Transactions += utof(urltext[k])

print 'Transactions:      ',urltext[k].strip()

elif k>86:

break

if len(sys.argv) > 1:

params = urllib.urlencode({'ip': sys.argv[1], 'action': 2})

else:

params = None

um_Logical=0

sum_Physical_reads=0

sum_Physical_writes=0

sum_Executes=0

sum_Transactions=0

url=['http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111211_10_16119_16120.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111211_17_16126_16127.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111210_17_16102_16103.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111210_10_16095_16096.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111209_17_16078_16079.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111208_17_16054_16055.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111209_10_16071_16072.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111208_10_16047_16048.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111207_17_16030_16031.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111207_10_16023_16024.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111206_17_16006_16007.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111206_10_15999_16000.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111205_17_15982_15983.html',

'http://127.0.0.1/cacti/spreport/rac3.yangql.com/sp_yangdb_20111205_10_15975_15976.html'

]

for val in url:

print ' '

gethtml(val)

calucate(urltext)

urltext = []

length=len(url)

print '-----------------------------------------'

print 'avg_Logical:       ',sum_Logical/length

print 'avg_Physical_reads:',sum_Physical_reads/length

print 'avg_Physical_writes',sum_Physical_writes/length

print 'avg_Executes       ',sum_Executes/length

print 'avg_Transactions   ',sum_Transactions/length

效果截图:

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值