XPath语法参考:https://blog.csdn.net/houyanhua1/article/details/86484770
demo.py(lxml模块,XPath语法提取页面数据):
# coding=utf-8
from lxml import etree
# 模拟html页面
text = ''' <div> <ul>
<li class="item-1"><a>first item</a></li>
<li class="item-1"><a href="link2.html">second item</a></li>
<li class="item-inactive"><a href="link3.html">third item</a></li>
<li class="item-1"><a href="link4.html">fourth item</a></li>
<li class="item-0"><a href="link5.html">fifth item</a>
</ul> </div> ''' #