1:选择所有的元素
from lxml import etree
root = etree.Element("root")
child1 = etree.SubElement(root, "child")
child2 = etree.SubElement(root, "child")
# 选择所有的 child 元素
children = root.xpath("//child")
for child in children:
print(etree.tostring(child))
2:选择具有特定属性的元素
from lxml import etree
root = etree.Element("root")
child1 = etree.SubElement(root, "child", attrib={"key": "value1"})
child2 = etree.SubElement(root, "child", attrib={"key": "value2"})
# 选择 key 属性为 value1 的 child 元素
children_with_value1 = root.xpath('//child[@key="value1"]')
for child in children_with_value1:
print(etree.tostring(child))
3:选择具有特定子元素的元素
from lxml import etree
root = etree.Element("root")
child1 = etree.SubElement(root, "child")
grandchild = etree.SubElement(child1, "grandchild")
child2 = etree.SubElement(root, "child")
# 选择具有 grandchild 子元素的 child 元素
children_with_grandchild = root.xpath('//child[grandchild]')
for child in children_with_grandchild:
print(etree.tostring(child))
4:选择特定位置的元素
from lxml import etree
root = etree.Element("root")
child1 = etree.SubElement(root, "child")
child2 = etree.SubElement(root, "child")
child3 = etree.SubElement(root, "child")
# 选择第一个 child 元素
first_child = root.xpath('//child[1]')[0]
print(etree.tostring(first_child))
5:获取元素的文本内容
from lxml import etree
root = etree.Element("root")
child = etree.SubElement(root, "child")
child.text = "Some text content"
# 获取 child 元素的文本内容
text_content = root.xpath('//child/text()')[0]
print(text_content) # 输出: Some text content
6:获取元素的属性值
from lxml import etree
root = etree.Element("root")
child = etree.SubElement(root, "child", attrib={"key": "value"})
# 获取 child 元素的 key 属性值
attribute_value = root.xpath('//child/@key')[0]
print(attribute_value) # 输出: value
*在使用 XPath 时,单斜杠 /
用于选择从根节点开始的路径,而双斜杠 //
用于选择文档中的任何位置,不考虑它们在文档树中的位置。