import requests
from lxml import etree
import os
import time
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36'
}
dirName = 'ImgLib'
if not os.path.exists(dirName):
os.mkdir(dirName)
for i in range(1,11):
print('>>>>>>>>>>'+'正在爬取第%d页的数据'%i)
if i==1:
url = 'http://pic.netbian.com/'
else:
url = 'http://pic.netbian.com/index_'+str(i)+'.html'
response = requests.get(url,headers=headers)
response.encoding = 'gbk'
page_text = response.text
tree = etree.HTML(page_text)
li_list = tree.xpath('//*[@id="main"]/div[3]/ul/li')
for li in li_list:
img_name = li.xpath('./a/b/text()')[0]+'.jpg'
time.sle
基于xpath爬取小图片
于 2023-04-23 21:06:32 首次发布
![](https://img-home.csdnimg.cn/images/20240711042549.png)