第三方模块安装:pip instll lxml
获取文本内容用:text()
取后代的标签用://
通配符,代表任意标签:/*
from lxml import etree
xml = '''
<book>
<id>1</id>
<name>中秋节月饼</name>
<price>9.8元</price>
<nick>博饼</nick>
<author>
<nick id="1">嫦娥</nick>
<nick id="2">月兔</nick>
<nick id="3">广寒宫</nick>
<dv>
<nick id="4">天蓬元帅</nick>
</dv>
<span>
<nick id="5">王刚</nick>
</span>
</author>
<parent>
<nick id="5">后裔</nick>
</parent>
</book>
'''
tree = etree.XML(xml) # 先创建一个etree对象
result1 = tree.xpath('/book/nick/text()') # text():作用是拿文本
print (result1) # 输出:['博饼']
result2 = tree.xpath('/book/author/nick/text()') # 取同一层级下的所有nick标签的文本
print(result2) # 输出:['嫦娥', '月兔', '广寒宫']
result3 = tree.xpath('/book/author//nick/text()') # :"//“取author下所有的nick标签的文本
print(result3)
result4 = tree.xpath('/book/author/*/nick/text()') # "*"通配符,代表任意结点,因为有2个nick分别在div,span下面
print(result4)
result5 = tree.xpath('/book//nick/text()') # 获取book下所有nick的文本
print(result5)