一、lxml 的安装
pip install lxml
二、使用 lxml 构建节点
from lxml import etree
if __name__ == '__main__':
# Element 用于构建 xml 文档结构的节点,
root = etree.Element("root")
# 查看类型
print(type(root))
# 将 Element 对象转换成字符串进行输出
print(etree.tostring(root))
lxml 的绝大多数功能都集中到了 etree 子模块中,在使用的时候需要导入。
三、添加 Element 节点中的属性和内容
from lxml import etree
if __name__ == '__main__':
# Element 用于构建 xml 文档结构的节点,
root = etree.Element("root")
# 查看类型
print(type(root))
# 将 Element 对象转换成字符串进行输出
print(etree.tostring(root))
# 添加节点的文本内容
root.text = "abc"
print(etree.tostring(root))
# 向节点中添加属性
root.set("class", "active")
print(etree.tostring(root))
执行结果
<class 'lxml.etree._Element'>
b'<root/>'
b'<root>abc</root>'
b'<root class="active">abc</root>'
四、使用 lxml 构建带有属性的节点
from lxml import etree
if __name__ == '__main__':
# Element 构造函数的第一个参数是标签名称,第二个参数的类型是字典类型,代表的是插入的属性。
child1 = etree.Element("child1", {"style":"font-size:18px"})
print(etree.tostring(child1))
child1 = etree.Element("child1", {"style":"font-size:18px", "class":"deactive"})
print(etree.tostring(child1))
五、修改标签名称
from lxml import etree if __name__ == '__main__': child1 = etree.Element("child1", {"style":"font-size:18px", "class":"deactive"}) print(etree.tostring(child1)) child1.tag = "child" print(etree.tostring(child1))
执行结果:
b'<child1 style="font-size:18px" class="deactive"/>'
b'<child style="font-size:18px" class="deactive"/>'
六、元素和标签的区别
个人理解:在很多地方,两个代表的意思是相同的。但是真实情况是 元素 = 标签 + 属性。
七、向节点中插入子节点
# 向节点中插入子节点
from lxml import etree
if __name__ == '__main__':
tree = etree.Element("root")
# 方式1,先创建一个节点,然后使用 append 追加
child1 = etree.Element("child1")
tree.append(child1)
print(etree.tostring(tree))
# 方式2、使用etree.SubElement 插入
etree.SubElement(tree, "child2")
print(etree.tostring(tree))
执行结果
b'<root><child1/></root>'
b'<root><child1/><child2/></root>'
八、练习1
利用lxml 库相关的函数构造如下形式的元素
<root>
<child1/>
<child2/>
<child3/>
</root>
from lxml.etree import Element
from lxml.etree import tostring
from lxml.etree import SubElement
if __name__ == '__main__':
root = Element("root")
child1 = Element("child1")
child2 = Element("child2")
child3 = Element("child3")
root.append(child1)
root.append(child2)
root.append(child3)
print(tostring(root))
from lxml.etree import Element
from lxml.etree import tostring
from lxml.etree import SubElement
if __name__ == '__main__':
root = Element("root")
SubElement(root, "child1")
SubElement(root, "child2")
SubElement(root, "child3")
print(tostring(root))
九、练习2
利用lxml 库相关的函数构造如下形式的元素
<root>
<child1 class="a">a</>
<child2 class="b">b</>
<child3 class="c">c</>
</root>
from lxml.etree import Element
from lxml.etree import tostring
from lxml.etree import SubElement
if __name__ == '__main__':
root = Element("root")
child1 = Element("child1", {"class":"a"})
child1.text = "a"
child2 = Element("child2", {"class": "b"})
child2.text = "b"
child3 = Element("child3", {"class": "c"})
child3.text = "c"
root.append(child1)
root.append(child2)
root.append(child3)
print(tostring(root))
from lxml.etree import Element
from lxml.etree import tostring
from lxml.etree import SubElement
if __name__ == '__main__':
root = Element("root")
child1 = Element("child1")
child1.text = "a"
child1.set("class", "a")
child2 = Element("child2")
child2.text = "b"
child2.set("class", "b")
child3 = Element("child3")
child3.text = "c"
child3.set("class", "c")
root.append(child1)
root.append(child2)
root.append(child3)
print(tostring(root))