selenium爬取猫咪图片并转换为统一尺寸(内附爬取并转换尺寸的的图片下载链接)
朋友机器学习需要几百张-一千张左右的照片,就简单写了个selenium的自动化操作爬虫,对猫咪的图片进行爬虫并进行尺寸的转换
下面有已经爬取好的5种猫咪每个种类五百多张图片
爬虫代码
import requests
from lxml import etree
import time
from selenium.webdriver import Chrome
import os
# 1.爬取网页
maomi=['德文','布偶','缅因','蓝猫','加菲','暹罗']
for page in maomi:
web = Chrome()
n = 1
web.get('https://cn.bing.com/images/search?q={}&qs=n&form=QBIR&sp=-1&pq=jia%27fei%27m&sc=8-9&cvid=36F2A5142F944DF1921F45FB416B9E46&first=1&tsc=ImageBasicHover'.format(page+"猫"))
# 2.分析网页,查找照片链接
time.sleep(2)
js="var q=document.documentElement.scrollTop=10000"
web.maximize_window()
time.sleep(2)
for i in range(12):
web.execute_script(js)
time.sleep(2)
response=html_source = web.page_source
lst=etree.HTML(response)
# 3.获取照片链接
src=lst.xpath("//div[@class='img_cont hoff']/img/@src")
for img_page in src:
try:
try:os.mkdir("猫咪/"+page);print("创建成功!")
except:pass
with open('猫咪/{}/壁纸{}.jpg'.format(page,n),'wb')as f:
f.write(requests.get(img_page).content)
print('下载壁纸第{}张完成!'.format(n))
n+=1
except Exception as e:
print(e)
print("网页访问失败!")
web.quit()
尺寸转换代码
import os
from PIL import Image
import glob
def convertImgSize(filename, outdir, width=128, height=128):
img = Image.open(filename)
try:
new = img.resize((width, height), Image.BILINEAR)
p = os.path.basename(filename)
print(p)
new.save(os.path.join(outdir, os.path.basename(filename)))
except Exception as e:
print(e)
if __name__ == '__main__':
# 查找给定路径下图片文件,并修改其大小
maomi = ['德文','布偶','缅因','蓝猫','加菲','暹罗'] # 加菲
for i in maomi:
for filename in glob.glob(r'D:/pythonProject1/testurllib/猫咪/{}/*.jpg'.format(i)):
print(filename)
convertImgSize(filename,'D:/pythonProject1/testurllib/猫咪/{}1'.format(i))
不过如果有需要的朋友记得修改爬虫的路径
下载链接
点击去后github中的图片.zip