python xml实例_Python3 XML操作实例

最新推荐文章于 2023-05-22 09:26:15 发布

weixin_39861882

最新推荐文章于 2023-05-22 09:26:15 发布

阅读量121

点赞数

文章标签： python xml实例

0.xml基础

每一个element可以看成：

texttail

tag，即标签，用于标识该元素

attributes，即属性，元素具有的属性，可以为空

text，元素包含的文本。可以为字符串、子元素的组合

tail，尾字符串，用于控制输出文件时的格式，一般为：\n\t或者\n，但希望输出缩进格式时，可以将其赋值。

2008

解析为：

tag1:rank

attributes:update

text:2

tail:\n\t

tag2:year

attributes:

text:2008

tail:\n\t

1.countries.xml文件

2008

141100

2011

59900

2011

13600

2.python代码

使用python标准库xml中的etree处理。

2.1 查找和遍历

查找可以使用XPath函数族find*来做。

root = ET.parse('countries.xml')

country_node = root.getiterator('country')

for node in country_node:

print("=" * 30)

if 'name' in node.attrib:

print(node.tag, '=> name: ', node.attrib['name'])

else:

print(node.tag)

print("-" * 30)

country_node_children = node.getchildren()

for country_node_child in country_node_children:

print(country_node_child.tag, '=> ', country_node_child.text)

print("~" * 30)

print("Find all countries which have neighbors on the west as ['direction']=='W'")

countries = root.findall("./country/neighbor[@direction='W']/..")

for country in countries:

print("-" * 30)

if 'name' in country.attrib:

print(country.tag, '=> name: ', country.attrib['name'])

else:

print(country.tag)

print("-" * 30)

country_infos = country.getchildren()

for country_info in country_infos:

if country_info.attrib and country_info.text:

print(country_info.tag, '=> ', country_info.text, country_info.attrib)

elif country_info.attrib:

print(country_info.tag, '=> ', country_info.attrib)

elif country_info.text:

print(country_info.tag, '=> ', country_info.text)

else:

print(country_info.tag)

2.2 删除

使用要删除节点的父节点来删除。不能用iterator来删除，iterator只是删除迭代子中的引用。

print("~" * 30)

print("Remove all countries has neighbor on west as ['direction']=='W'")

find_countries = tree.findall("./country/neighbor[@direction='W']/..")

for country in find_countries:

print("Remove...")

show_country(country)

# Dont using all_countries iterator to remove

# which only remove reference from iterator not from elementTree

root_data.remove(country)

2.3 添加

将节点加入到父节点来实现节点加入。

print("~" * 30)

print("Add one country...")

one_country = ET.Element("country", {"name": "Panama"})

one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})

one_country_rank.text = "69"

one_country_year = ET.SubElement(one_country, "year")

one_country_year.text = "2011"

one_country_gdppc = ET.SubElement(one_country, "gdppc")

one_country_gdppc.text = "13600"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})

ET.dump(one_country)

root_data.append(one_country)

2.4 输出到文件

输入文件已经Beautify过了，所以读取后每一个元素的tail都包含控制符\n\t或者\n+空格。新加入节点的tail为空，默认输出时不会Beautify处理。

2.4.1 程序控制每一个元素的缩进，将每一个元素的tail设置为合适的空格或者tab数量。代码如下：

print("Add one country...")

one_country = ET.Element("country", {"name": "Panama"})

one_country.tail = "\n"

one_country.text = "asdasd\n\t\t"

one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})

one_country_rank.text = "69"

one_country_rank.tail = "\n\t\t"

one_country_year = ET.SubElement(one_country, "year")

one_country_year.text = "2011"

one_country_year.tail = "\n\t\t"

one_country_gdppc = ET.SubElement(one_country, "gdppc")

one_country_gdppc.text = "13600"

one_country_gdppc.tail = "\n\t\t"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})

one_country_neighbor.tail = "\n\t\t"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})

one_country_neighbor.tail = "\n\t"

ET.dump(one_country)

root_data.append(one_country)

ET.ElementTree(root_data).write("new_countries.xml", None, True, None, "xml")

2.4.2 首先遍历所有元素，将tail设置为“”，然后在输出时控。

代码如下：

def strip_all_tail(root_node):

if root_node:

if root_node.text:

root_node.text = root_node.text.strip()

if root_node.tail:

root_node.tail = root_node.tail.strip()

# 遍历每个子节点

children_node = list(root_node)

for child in children_node:

if child.text:

child.text = child.text.strip()

if child.tail:

child.tail = child.tail.strip()

strip_all_tail(child)

xmlstr = ET.tostring(root_data).decode()

newxml = md.parseString(xmlstr)

strip_all_tail(root_data)

with open('new_countries.xml', 'w') as outfile:

outfile.write(newxml.toprettyxml(indent='\t', newl='\n'))

2.5 完整代码

try:

import xml.etree.cElementTree as ET

except ImportError:

import xml.etree.ElementTree as ET

def show_country(acountry):

print("=" * 30)

if 'name' in acountry.attrib:

print(acountry.tag, '=> name: ', acountry.attrib['name'])

else:

print(acountry.tag)

print("-" * 30)

_country_infos = acountry.getchildren()

for _country_info in _country_infos:

if _country_info.attrib and _country_info.text:

print(_country_info.tag, '=> ', _country_info.text, _country_info.attrib)

elif _country_info.attrib:

print(_country_info.tag, '=> ', _country_info.attrib)

elif _country_info.text:

print(_country_info.tag, '=> ', _country_info.text)

else:

print(_country_info.tag)

def show_countries(country_list):

for _country in country_list:

show_country(_country)

tree = ET.parse('countries.xml')

root_data = tree.getroot() # point to xml node:

all_countries = tree.getiterator('country')

show_countries(all_countries)

print("~" * 30)

print("Find all countries has neighbor on west as ['direction']=='W'")

find_countries = tree.findall("./country/neighbor[@direction='W']/..")

show_countries(find_countries)

print("~" * 30)

print("Remove all countries has neighbor on west as ['direction']=='W'")

find_countries = tree.findall("./country/neighbor[@direction='W']/..")

for country in find_countries:

print("Remove...")

show_country(country)

# Dont using all_countries iterator to remove

# which only remove reference from iterator not from elementTree

root_data.remove(country)

print("~" * 30)

print("Remain countries...")

countries = tree.getiterator('country')

show_countries(countries)

print("~" * 30)

print("Add one country...")

one_country = ET.Element("country", {"name": "Panama"})

one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})

one_country_rank.text = "69"

one_country_year = ET.SubElement(one_country, "year")

one_country_year.text = "2011"

one_country_gdppc = ET.SubElement(one_country, "gdppc")

one_country_gdppc.text = "13600"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})

ET.dump(one_country)

root_data.append(one_country)

countries = tree.getiterator('country')

show_countries(countries)

weixin_39861882

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
复制链接

分享到 QQ

分享到新浪微博

扫一扫