python xml实例_Python3 XML操作实例

0.xml基础

每一个element可以看成:

texttail

tag,即标签,用于标识该元素

attributes,即属性,元素具有的属性,可以为空

text,元素包含的文本。可以为字符串、子元素的组合

tail,尾字符串,用于控制输出文件时的格式,一般为:\n\t或者\n,但希望输出缩进格式时,可以将其赋值。

2

2008

解析为:

tag1:rank

attributes:update

text:2

tail:\n\t

tag2:year

attributes:

text:2008

tail:\n\t

1.countries.xml文件

2

2008

141100

5

2011

59900

69

2011

13600

2.python代码

使用python标准库xml中的etree处理。

2.1 查找和遍历

查找可以使用XPath函数族find*来做。

root = ET.parse('countries.xml')

country_node = root.getiterator('country')

for node in country_node:

print("=" * 30)

if 'name' in node.attrib:

print(node.tag, '=> name: ', node.attrib['name'])

else:

print(node.tag)

print("-" * 30)

country_node_children = node.getchildren()

for country_node_child in country_node_children:

print(country_node_child.tag, '=> ', country_node_child.text)

print("~" * 30)

print("Find all countries which have neighbors on the west as ['direction']=='W'")

countries = root.findall("./country/neighbor[@direction='W']/..")

for country in countries:

print("-" * 30)

if 'name' in country.attrib:

print(country.tag, '=> name: ', country.attrib['name'])

else:

print(country.tag)

print("-" * 30)

country_infos = country.getchildren()

for country_info in country_infos:

if country_info.attrib and country_info.text:

print(country_info.tag, '=> ', country_info.text, country_info.attrib)

elif country_info.attrib:

print(country_info.tag, '=> ', country_info.attrib)

elif country_info.text:

print(country_info.tag, '=> ', country_info.text)

else:

print(country_info.tag)

2.2 删除

使用要删除节点的父节点来删除。不能用iterator来删除,iterator只是删除迭代子中的引用。

print("~" * 30)

print("Remove all countries has neighbor on west as ['direction']=='W'")

find_countries = tree.findall("./country/neighbor[@direction='W']/..")

for country in find_countries:

print("Remove...")

show_country(country)

# Dont using all_countries iterator to remove

# which only remove reference from iterator not from elementTree

root_data.remove(country)

2.3 添加

将节点加入到父节点来实现节点加入。

print("~" * 30)

print("Add one country...")

one_country = ET.Element("country", {"name": "Panama"})

one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})

one_country_rank.text = "69"

one_country_year = ET.SubElement(one_country, "year")

one_country_year.text = "2011"

one_country_gdppc = ET.SubElement(one_country, "gdppc")

one_country_gdppc.text = "13600"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})

ET.dump(one_country)

root_data.append(one_country)

2.4 输出到文件

输入文件已经Beautify过了,所以读取后每一个元素的tail都包含控制符\n\t或者\n+空格。新加入节点的tail为空,默认输出时不会Beautify处理。

2.4.1 程序控制每一个元素的缩进,将每一个元素的tail设置为合适的空格或者tab数量。代码如下:

print("Add one country...")

one_country = ET.Element("country", {"name": "Panama"})

one_country.tail = "\n"

one_country.text = "asdasd\n\t\t"

one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})

one_country_rank.text = "69"

one_country_rank.tail = "\n\t\t"

one_country_year = ET.SubElement(one_country, "year")

one_country_year.text = "2011"

one_country_year.tail = "\n\t\t"

one_country_gdppc = ET.SubElement(one_country, "gdppc")

one_country_gdppc.text = "13600"

one_country_gdppc.tail = "\n\t\t"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})

one_country_neighbor.tail = "\n\t\t"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})

one_country_neighbor.tail = "\n\t"

ET.dump(one_country)

root_data.append(one_country)

ET.ElementTree(root_data).write("new_countries.xml", None, True, None, "xml")

2.4.2 首先遍历所有元素,将tail设置为“”,然后在输出时控。

代码如下:

def strip_all_tail(root_node):

if root_node:

if root_node.text:

root_node.text = root_node.text.strip()

if root_node.tail:

root_node.tail = root_node.tail.strip()

# 遍历每个子节点

children_node = list(root_node)

for child in children_node:

if child.text:

child.text = child.text.strip()

if child.tail:

child.tail = child.tail.strip()

strip_all_tail(child)

xmlstr = ET.tostring(root_data).decode()

newxml = md.parseString(xmlstr)

strip_all_tail(root_data)

with open('new_countries.xml', 'w') as outfile:

outfile.write(newxml.toprettyxml(indent='\t', newl='\n'))

2.5 完整代码

try:

import xml.etree.cElementTree as ET

except ImportError:

import xml.etree.ElementTree as ET

def show_country(acountry):

print("=" * 30)

if 'name' in acountry.attrib:

print(acountry.tag, '=> name: ', acountry.attrib['name'])

else:

print(acountry.tag)

print("-" * 30)

_country_infos = acountry.getchildren()

for _country_info in _country_infos:

if _country_info.attrib and _country_info.text:

print(_country_info.tag, '=> ', _country_info.text, _country_info.attrib)

elif _country_info.attrib:

print(_country_info.tag, '=> ', _country_info.attrib)

elif _country_info.text:

print(_country_info.tag, '=> ', _country_info.text)

else:

print(_country_info.tag)

def show_countries(country_list):

for _country in country_list:

show_country(_country)

tree = ET.parse('countries.xml')

root_data = tree.getroot() # point to xml node:

all_countries = tree.getiterator('country')

show_countries(all_countries)

print("~" * 30)

print("Find all countries has neighbor on west as ['direction']=='W'")

find_countries = tree.findall("./country/neighbor[@direction='W']/..")

show_countries(find_countries)

print("~" * 30)

print("Remove all countries has neighbor on west as ['direction']=='W'")

find_countries = tree.findall("./country/neighbor[@direction='W']/..")

for country in find_countries:

print("Remove...")

show_country(country)

# Dont using all_countries iterator to remove

# which only remove reference from iterator not from elementTree

root_data.remove(country)

print("~" * 30)

print("Remain countries...")

countries = tree.getiterator('country')

show_countries(countries)

print("~" * 30)

print("Add one country...")

one_country = ET.Element("country", {"name": "Panama"})

one_country_rank = ET.SubElement(one_country, "rank", {"updated": "yes"})

one_country_rank.text = "69"

one_country_year = ET.SubElement(one_country, "year")

one_country_year.text = "2011"

one_country_gdppc = ET.SubElement(one_country, "gdppc")

one_country_gdppc.text = "13600"

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Costa Rica", "direction": "W"})

one_country_neighbor = ET.SubElement(one_country, "neighbor", {"name": "Colombia", "direction": "E"})

ET.dump(one_country)

root_data.append(one_country)

countries = tree.getiterator('country')

show_countries(countries)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值