import requests
from day02_spider.useragents import ua_list
import random
import os
import re
from urllib import parse
class BaiduImageSpider(object):
def __init__(self):
self.url='https://image.baidu.com/search/index?tn=baiduimage&word={}'
#计数
self.i=1
#获取图片
def get_image(self,url,word):
headers={"User-Agent":random.choice(ua_list)}
#获取图片链接
html=requests.get(url=url,headers=headers).text
pattern=re.compile('"thumbURL":"(.*?)"',re.S)
img_link_list=pattern.findall(html)
directory = "D:/Python_code/python_train/images/{}/".format(word)
# 如果电影名路径不存在,则先创建
if not os.path.exists(directory):
os.makedirs(directory)
#for循环遍历,下载每张图片
for img_link in img_link_list:
self.save_image(img_link,directory,word)
def save_image(self,img_link,directory,word):
headers={"User-Agent":random.choice(ua_list)}
#向图片链接发送请求,得到bytes类型
html=requests.get(url=img_link,headers=headers).content
filename=directory+'{}_{}.jpg'.format(word,self.i)
with open(filename,"wb") as f:
f.write(html)
self.i+=1
print(filename,"下载成功!")
#入口函数
def run(self):
word=input("请输入你要保存的图片:")
#将word编码
word1=parse.quote(word)
url=self.url.format(word1)
self.get_image(url,word)
if __name__ == '__main__':
spider=BaiduImageSpider()
spider.run()
爬取百度图片
最新推荐文章于 2024-05-27 13:31:48 发布