转自 http://blog.csdn.net/yiluochenwu/article/details/23515923
XML文件内容:
- <students>
- <student name='刘备' sex='男' age='35'/>
- <student name='吕布' sex='男' age='38'/>
- <student name='貂蝉' sex='女' age='22'/>
- </students>
代码:
- #-*- coding: UTF-8 -*-
- # 从文件中读取数据
- import xml.etree.ElementTree as ET
- #全局唯一标识
- unique_id = 1
- #遍历所有的节点
- def walkData(root_node, level, result_list):
- global unique_id
- temp_list =[unique_id, level, root_node.tag, root_node.attrib]
- result_list.append(temp_list)
- unique_id += 1
- #遍历每个子节点
- children_node = root_node.getchildren()
- if len(children_node) == 0:
- return
- for child in children_node:
- walkData(child, level + 1, result_list)
- return
- #获得原始数据
- #out:
- #[
- # #ID, Level, Attr Map
- # [1, 1, {'ID':1, 'Name':'test1'}],
- # [2, 1, {'ID':1, 'Name':'test2'}],
- #]
- def getXmlData(file_name):
- level = 1 #节点的深度从1开始
- result_list = []
- root = ET.parse(file_name).getroot()
- walkData(root, level, result_list)
- return result_list
- if __name__ == '__main__':
- file_name = 'test.xml'
- R = getXmlData(file_name)
- for x in R:
- print x
- pass
输出结果:
- [1, 1, 'students', {}]
- [2, 2, 'student', {'age': '35', 'name': u'\u5218\u5907', 'sex': u'\u7537'}]
- [3, 2, 'student', {'age': '38', 'name': u'\u5415\u5e03', 'sex': u'\u7537'}]
- [4, 2, 'student', {'age': '22', 'name': u'\u8c82\u8749', 'sex': u'\u5973'}]