from lxml import etree
# 读取html或htm文件
with open(path) as f:
a = f.read()
# 解析页面元素结构
selector=etree.HTML(a)
# 用xpath获取元素位置上的文字
title = selector.xpath
Html 页面信息抽取
最新推荐文章于 2022-11-15 12:59:29 发布
from lxml import etree
# 读取html或htm文件
with open(path) as f:
a = f.read()
# 解析页面元素结构
selector=etree.HTML(a)
# 用xpath获取元素位置上的文字
title = selector.xpath