import requests, os, time
import aiohttp, asyncio
import hashlib
class Spider(object):
def __init__(self):
self.headers = {
'User-Agent':''}
self.num = 1
if 'pro' not in os.listdir('.'):
os.mkdir('pro')
self.path = os.path.join(os.path.abspath('.'),'pro')
os.chdir(self.path)# 进入文件下载路径
async def __get_content(self, link): # 传入的是图片链接
async with aiohttp.ClientSession() as session:
response =await session.get(link)
content = await response.read()
return content
def __get_img_links(self, page): # 获取图片链接
# 使用requests请求返回全部图片的链接,也就是一个列表
return 1,2 #
def __get_md5_path_name(self,url_name): # 把图片的URL传入进去,加密得到的字符串来做图片名称
cre_md5 = hashlib.md5()
utf_url_name = url_name.encode(encoding = 'utf-8')
cre_md5.update(utf_url_name)
return cre_md5.hexdigest()
async def __download_img(self, img):
content = await self.__get_content(img) # 获取图片的进制文件
file_name = self.__get_md5_path_name(img) # 获取到MD5名称
with open(file_name + '.jpg', 'wb') as f:
f.write(content)
print('下载第%s张图片成功' % self.num)
self.num +=1
def run(self):
start = time.time()
for x in range(1,101): # 下载一百页的图片就可以了,或者自己更改页数
links = self.__get_img_links(x) # 把那一页需要爬图片的链接传进去
tasks = [asyncio.ensure_future(self.__download_img(link)) for link in links]
loop = asyncio.get_event_loop()
loop.run_until_complete(asyncio.wait(tasks))
if self.num >= 10:# 测试速度使用,如需要下载多张图片可以注释这段代码
break
end = time.time()
print('共运行了%s秒'% (end - start))
def main():
spider = Spider()
spider.run()
if __name__ == '__main__':
main()