# CY3761 | 2021-11-19 12:22
import requests
from lxml import etree
from pyquery import PyQuery as pq
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
'Chrome/95.0.4638.69 Safari/537.36 '
}
reqs = requests.get('http://www.netbian.com/desk/23805-1920x1080.htm', headers=headers)
html = reqs.text
print(reqs.status_code)
_ = pq(html)
print(_('title').text())
imgs = _('#endimg img')
print(len(imgs))
for _ in imgs:
print(pq(_).attr('src'))
print('-' * 80)
# 抓取数据表格的需要注意 源代码不一定有tbody
e = etree.HTML(html)
print(e.xpath(r'//table[@id="endimg"]/@width')) # 获取表格width属性 返回列表
print(e.xpath('//@src'))
效果
我个人比较习惯pyquery