在一个 html 页面中有如下 xml 代码:
<table border="1" class="bodyTable" cellpadding="0" cellspacing="0">
<tr class="a">
<th>
Tests</th>
<th>
Errors </th>
<th>
Failures</th>
<th>
Skipped</th>
<th>
Success Rate</th>
<th>
Time</th>
</tr>
<tr class="b">
<td>
499</td>
<td>
9</td>
<td>
49</td>
<td>
4</td>
<td>
87.575%</td>
<td>
17.632</td>
</tr>
</table>
我们要获得每个 key(Tests,Failures等 )对应的value,可以用如下代码段实现。
from xml.sax.handler import ContentHandler
from xml.sax import parse
from optparse import OptionParser
class HeadlineHandler(ContentHandler):
th = False
td = False
def __init__(self,k_list, v_list):
#super(HeadlineHandler,self).__init__()
ContentHandler.__init__(self)
self.k_list = k_list
self.v_list = v_list
self.data_k = []
self.data_v = []
def startElement(self, name, attrs):
if name == "th":
self.th = True
if name == "td":
self.td = True
def endElement(self, name):
if name == "th":
text = ''.join(self.data_k)
self.data_k = []
self.k_list.append(text)
self.th = False
if name == "td":
text = ''.join(self.data_v)
self.data_v = []
self.v_list.append(text)
self.td = False
#print "k_list:%s"%self.k_list
#print "v_list:%s"%self.v_list
def characters(self, content):
if self.th:
self.data_k.append(content.strip('\n'))
if self.td:
self.data_v.append(content.strip('\n'))
def parse_xml(xml_path):
#xml_path:xml文件路径
k_list = []
v_list = []
parse(xml_path,HeadlineHandler(k_list,v_list))
#print k_list
#print v_list
print "---------------------------------------------------------------"
for k in k_list:
print k + " ",
print "\n"
i = 0
for v in v_list:
if i == 0:
print v + " " + " "*len(k_list[i]),
else:
print v + " " + " "*len(k_list[i-1]),
i += 1
print "\n"
print "---------------------------------------------------------------"
if v_list[1] != 0 or v_list[2] != 0:
return False
return True