from lxml import etree
"""
1.读取html字符串
"""
text = \
"""
<tr class="hots">
<td class="1">hot1</td>
<td class="2">hot2</td>
<td class="3">hot3</td>
<td class="4">hot4</td>
<td class="5">hot5
<td class="6">爬虫
</tr>
"""
html = etree.HTML(text)
print(type(html))
result = etree.tostring(html,encoding='utf8').decode('utf8')
print(result)
"""
2.直接解析html文件
【默认使用xml解析器】
"""
parser = etree.HTMLParser(encoding='utf8')
html = etree.parse(r"/Users/dx/Desktop/(凡博)Python爬虫资料/3.Python爬虫数据提取-Xpath语法/课程资料/test.html",parser=parser)
result = etree.tostring(html,encoding='utf8').decode('utf8')
print(result)
text = \
"""
<ul class="ullist" padding="1" spacing="1">
<li>
<div id="top">
<span class="position" width="350">职位名称</span>