**
xml是一种十分常见的表机性语言,可提供统一的方法来描述应用程序的结构化数据。
**
[code lang="xml"]
<?xml version="1.0"?>
<data>
<country name="Liechtenstein">
<rank>1</rank>
<year>2008</year>
<gdppc>141100</gdppc>
<neighbor name="Austria" direction="E"/>
<neighbor name="Switzerland" direction="W"/>
</country>
<country name="Singapore">
<rank>4</rank>
<year>2011</year>
<gdppc>59900</gdppc>
<neighbor name="Malaysia" direction="N"/>
</country>
<country name="Panama">
<rank>68</rank>
<year>2011</year>
<gdppc>13600</gdppc>
<neighbor name="Costa Rica" direction="W"/>
<neighbor name="Colombia" direction="E"/>
</country>
</data>
我们可以使用标准库中的xml.etree.ElementTree,其中的parse函数可以解析xml文档。
from xml.etree.ElementEtree import parse
#导入这个函数
parse这个函数有两个参数parse(source,parse=None)
1
2
3
4
5
6
7
8
9
10
11
|
In
[
28
]
:
parse
?
Signature
:
parse
(
source
,
parser
=
None
)
Docstring
:
Parse
XML
document
into
element
tree
.
*
source
*
is
a
filename
or
file
object
containing
XML
data
,
*
parser
*
is
an
optional
parser
instance
defaulting
to
XMLParser
.
Return
an
ElementTree
instance
.
File
:
/
usr
/
local
/
Cellar
/
python3
/
3.6.2
/
Frameworks
/
Python
.
framework
/
Versions
/
3.6
/
lib
/
python3
.
6
/
xml
/
etree
/
ElementTree
.
py
Type
:
function
|
可以把上面这个xml文件作为source也就是输入元。
1
2
3
4
5
6
7
|
In
[
31
]
:
from
xml.etree.ElementTree
import
fromstring
,
parse
In
[
32
]
:
f
=
open
(
'01.xml'
)
In
[
33
]
:
et
=
parse
(
f
)
In
[
34
]
:
root
=
et
.
getroot
(
)
|
1
2
3
4
|
# 获取根地址
In
[
35
]
:
root
Out
[
35
]
:
<
Element
'data'
at
0x107949818
>
|
1
2
3
|
In
[
36
]
:
root
.
tag
Out
[
36
]
:
'data'
# 获取标签
|
1
2
3
|
# 获取属性
In
[
37
]
:
root
.
attrib
Out
[
37
]
:
{
}
|
1
2
3
4
5
6
|
# 获取文本
In
[
38
]
:
root
.
text
Out
[
38
]
:
'\n '
In
[
39
]
:
root
.
text
.
strip
(
)
Out
[
39
]
:
''
|
1
2
3
4
5
6
|
# 获取子节点
In
[
40
]
:
root
.
getchildren
(
)
Out
[
40
]
:
[
<
Element
'country'
at
0x107d76778
>
,
<
Element
'country'
at
0x107d3e188
>
,
<
Element
'country'
at
0x107d3e0e8
>
]
|
1
2
3
4
5
6
7
8
|
# 获取子节点的文本
In
[
43
]
:
for
x
in
root
:
.
.
.
:
print
(
x
.
get
(
'name'
)
)
.
.
.
:
.
.
.
:
Liechtenstein
Singapore
Panama
|
1
2
3
4
|
# 获取root 节点的 country(第一个节点)
In
[
44
]
:
root
.
find
(
'country'
)
Out
[
44
]
:
<
Element
'country'
at
0x107d76778
>
|
1
2
3
4
5
6
7
|
# 获取root 节点下的所遇的country
In
[
45
]
:
root
.
findall
(
'country'
)
Out
[
45
]
:
[
<
Element
'country'
at
0x107d76778
>
,
<
Element
'country'
at
0x107d3e188
>
,
<
Element
'country'
at
0x107d3e0e8
>
]
|
1
2
3
4
5
6
7
8
9
10
|
# 获取root 节点下的所遇的country 生成器
In
[
46
]
:
root
.
iterfind
(
'country'
)
Out
[
46
]
:
<
generator
object
prepare_child
.
<
locals
>
.
select
at
0x107147b48
>
In
[
47
]
:
for
e
in
root
.
iterfind
(
'country'
)
:
.
.
.
:
print
(
e
.
get
(
'name'
)
)
.
.
.
:
Liechtenstein
Singapore
Panama
|
1
2
3
4
5
6
7
8
|
# 不能获取孙子节点
In
[
48
]
:
root
.
findall
(
'rank'
)
Out
[
48
]
:
[
]
In
[
49
]
:
root
.
iter
(
)
Out
[
49
]
:
<
_elementtree
.
_element_iterator
at
0x107d530a0
>
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
|
# 获取所有的节点
In
[
50
]
:
[
x
for
x
in
root
.
iter
(
)
]
Out
[
50
]
:
[
<
Element
'data'
at
0x107949818
>
,
<
Element
'country'
at
0x107d76778
>
,
<
Element
'rank'
at
0x107d76818
>
,
<
Element
'year'
at
0x107d76728
>
,
<
Element
'gdppc'
at
0x107d76598
>
,
<
Element
'neighbor'
at
0x10784f9f8
>
,
<
Element
'neighbor'
at
0x10784f8b8
>
,
<
Element
'country'
at
0x107d3e188
>
,
<
Element
'rank'
at
0x107d3e908
>
,
<
Element
'year'
at
0x107d3e548
>
,
<
Element
'gdppc'
at
0x107d3ec78
>
,
<
Element
'neighbor'
at
0x107d3e638
>
,
<
Element
'country'
at
0x107d3e0e8
>
,
<
Element
'rank'
at
0x107d3ed18
>
,
<
Element
'year'
at
0x107d3ea48
>
,
<
Element
'gdppc'
at
0x107d3e368
>
,
<
Element
'neighbor'
at
0x107d3e9a8
>
,
<
Element
'neighbor'
at
0x107d3eb88
>
]
|
1
2
3
4
5
6
|
# 获取接点是 rank的所有节点,包括rank
In
[
51
]
:
[
x
for
x
in
root
.
iter
(
'rank'
)
]
Out
[
51
]
:
[
<
Element
'rank'
at
0x107d76818
>
,
<
Element
'rank'
at
0x107d3e908
>
,
<
Element
'rank'
at
0x107d3ed18
>
]
|
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
|
# 查找 country 下所有的子元素
In
[
52
]
:
root
.
findall
(
'country/*'
)
Out
[
52
]
:
[
<
Element
'rank'
at
0x107d76818
>
,
<
Element
'year'
at
0x107d76728
>
,
<
Element
'gdppc'
at
0x107d76598
>
,
<
Element
'neighbor'
at
0x10784f9f8
>
,
<
Element
'neighbor'
at
0x10784f8b8
>
,
<
Element
'rank'
at
0x107d3e908
>
,
<
Element
'year'
at
0x107d3e548
>
,
<
Element
'gdppc'
at
0x107d3ec78
>
,
<
Element
'neighbor'
at
0x107d3e638
>
,
<
Element
'rank'
at
0x107d3ed18
>
,
<
Element
'year'
at
0x107d3ea48
>
,
<
Element
'gdppc'
at
0x107d3e368
>
,
<
Element
'neighbor'
at
0x107d3e9a8
>
,
<
Element
'neighbor'
at
0x107d3eb88
>
]
|
1
2
3
4
5
6
|
# 选中当前元素下的所有的rank
In
[
54
]
:
root
.
findall
(
'.//rank'
)
Out
[
54
]
:
[
<
Element
'rank'
at
0x107d76818
>
,
<
Element
'rank'
at
0x107d3e908
>
,
<
Element
'rank'
at
0x107d3ed18
>
]
|
1
2
3
4
5
6
7
|
# 选择country 包含name的属性
In
[
58
]
:
root
.
findall
(
'country[@name]'
)
Out
[
58
]
:
[
<
Element
'country'
at
0x107d76778
>
,
<
Element
'country'
at
0x107d3e188
>
,
<
Element
'country'
at
0x107d3e0e8
>
]
|
1
2
3
4
5
|
# 选择country 包含name的属性是"Liechtenstein"的节点
In
[
59
]
:
root
.
findall
(
'country[@name="Liechtenstein"]'
)
Out
[
59
]
:
[
<
Element
'country'
at
0x107d76778
>
]
|
1
2
3
4
5
6
7
|
# 选择country 必须包含 rank 节点
In
[
60
]
:
root
.
findall
(
'country[rank]'
)
Out
[
60
]
:
[
<
Element
'country'
at
0x107d76778
>
,
<
Element
'country'
at
0x107d3e188
>
,
<
Element
'country'
at
0x107d3e0e8
>
]
|
1
2
3
4
|
# 选择country 必须包含 rank 节点 而且节点的值必须是4
In
[
63
]
:
root
.
findall
(
'country[rank="4"]'
)
Out
[
63
]
:
[
<
Element
'country'
at
0x107d3e188
>
]
|