from lxml import etree
if __name__ =='__main__':
text ='''
<div>
<ul>
<li class="item-0"><a href="link1.html">first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
</ul>
</div>
'''# 将文本转成html对象
html = etree.HTML(text)# 将对象转成html文本
result = etree.tostring(html)# 打印输出print(result.decode('utf-8'))
xpath
from lxml import etree
if __name__ =='__main__':
html = etree.parse('text.html')
result = html.xpath('//li[@class="item-1"]')for item in result:print(etree.tostring(item).decode('utf-8'))