话不多说,直接放代码:
#ecoding:“utf-8”
import requests
from lxml import html
etree=html.etree
import urllib
url=‘https://www.nipic.com/topic/show_27434_1.html’ #用了昵图网做一个小测验
header={
‘User-Agent’: ‘Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36’
}
resp=requests.get(url,headers=header)
text=etree.HTML(resp.text)#获取文本内容
title=text.xpath(’.//li[@class=“new-search-works-item”]//a/text()’)#xpath获取图片名称
imgUrl=text.xpath(’.//li[@class=“new-search-works-item”]//img/@src’)#xpath获取图片路径
p = [x.strip() for x in title if x.strip() != ‘’]#去掉title列表中的空格,换行符
for i,j in zip(p,imgUrl):
imgname=i.replace(’ ‘, ‘’)#去掉图片名中的空格
j=‘https:’+j#拼接图片https:头
urllib.request.urlretrieve(j, ‘…/img/’+imgname+’.jpg’)#读取图片内容写入文件夹
print(‘over’)
python爬取图片
最新推荐文章于 2024-04-16 22:07:34 发布