使用Python解析xml文件的文章很多,确实也很简单,但是当文件中含有命名空间名的时候,许多文章中给出的例子就无法工作了,其实解决这一问题也很简单,只需加上一行正则就可以了!
欢迎给出意见和批评!
程序代码:
#!/usr/bin/python
import sys
import re
import xml.etree.ElementTree as ET
def parseOptions ():
if len(sys.argv) < 2:
print """
USAGE:
python program.py file
"""
sys.exit(1)
else:
filename = sys.argv[1]
return filename
def main ():
filename = parseOptions()
root = ET.parse(filename).getroot()
r = re.compile('({.+})')
xmlns = r.search(root.tag).group(1)
for node in root:
IdList = xmlns + 'IdList'
if re.match(IdList, node.tag):
for child in node:
if child.tag.find('Id'):
print child.text
if __name__ == '__main__':
main()
XML文件:
<eSearchResult xmlns="http://www.ncbi.nlm.nih.gov/soap/eutils/esearch">
<Count>329</Count>
<RetMax>20</RetMax>
<RetStart>0</RetStart>
<IdList>
<Id>18371695</Id>
<Id>18353787</Id>
<Id>18337261</Id>
<Id>18312416</Id>
<Id>18302187</Id>
<Id>18232994</Id>
<Id>18231669</Id>
<Id>18223291</Id>
<Id>18085574</Id>
<Id>18058769</Id>
<Id>18056069</Id>
<Id>17984965</Id>
<Id>17968707</Id>
<Id>17939156</Id>
<Id>17916468</Id>
<Id>17875870</Id>
<Id>17768677</Id>
<Id>17763598</Id>
<Id>17699872</Id>
<Id>17695722</Id>
</IdList>
<TranslationSet>
</TranslationSet>
<TranslationStack>
<TermSet>
<Term>taverna[All Fields]</Term>
<Field>All Fields</Field>
<Count>329</Count>
<Explode>Y</Explode>
</TermSet>
<OP>GROUP</OP>
</TranslationStack>
<QueryTranslation>taverna[All Fields]</QueryTranslation>
</eSearchResult>
转自:http://blog.chinaunix.net/space.php?uid=20683570&do=blog&id=1573019
欢迎给出意见和批评!
程序代码:
#!/usr/bin/python
import sys
import re
import xml.etree.ElementTree as ET
def parseOptions ():
if len(sys.argv) < 2:
print """
USAGE:
python program.py file
"""
sys.exit(1)
else:
filename = sys.argv[1]
return filename
def main ():
filename = parseOptions()
root = ET.parse(filename).getroot()
r = re.compile('({.+})')
xmlns = r.search(root.tag).group(1)
for node in root:
IdList = xmlns + 'IdList'
if re.match(IdList, node.tag):
for child in node:
if child.tag.find('Id'):
print child.text
if __name__ == '__main__':
main()
XML文件:
<eSearchResult xmlns="http://www.ncbi.nlm.nih.gov/soap/eutils/esearch">
<Count>329</Count>
<RetMax>20</RetMax>
<RetStart>0</RetStart>
<IdList>
<Id>18371695</Id>
<Id>18353787</Id>
<Id>18337261</Id>
<Id>18312416</Id>
<Id>18302187</Id>
<Id>18232994</Id>
<Id>18231669</Id>
<Id>18223291</Id>
<Id>18085574</Id>
<Id>18058769</Id>
<Id>18056069</Id>
<Id>17984965</Id>
<Id>17968707</Id>
<Id>17939156</Id>
<Id>17916468</Id>
<Id>17875870</Id>
<Id>17768677</Id>
<Id>17763598</Id>
<Id>17699872</Id>
<Id>17695722</Id>
</IdList>
<TranslationSet>
</TranslationSet>
<TranslationStack>
<TermSet>
<Term>taverna[All Fields]</Term>
<Field>All Fields</Field>
<Count>329</Count>
<Explode>Y</Explode>
</TermSet>
<OP>GROUP</OP>
</TranslationStack>
<QueryTranslation>taverna[All Fields]</QueryTranslation>
</eSearchResult>
转自:http://blog.chinaunix.net/space.php?uid=20683570&do=blog&id=1573019