使用etree.HTML(),解析网页报错ValueError: can only parse strings
from lxml import etree
html = open("joblist.html")
print(html)
et = etree.HTML(html,parser=etree.HTMLParser(encoding='utf-8'))
在open()后加上.read(),即可解决问题。
from lxml import etree
html = open("joblist.html").read()
print(html)
et = etree.HTML(html,parser=etree.HTMLParser(encoding='utf-8'))