#!/usr/bin/env python3
#-*-coding:utf-8-*-
import re
from urllib import request
from gevent import monkey
import os
monkey.patch_socket() #遇到socket请求进行替换
import gevent
import requests
#1.抓取网页信息,保存在wallpaper.html
def digestPage():
headers={
'user-agent':'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
url = "https://wallhalla.com/random"
webPage=requests.get(url,headers=headers)
if webPage.status_code==200:
data=webPage.content #得到二进制数据
print(type(data))
else:
print('无法抓取信息')
return False
with open('wallpaper.html','wb') as wall_file: #必须以wb才能把网页数据写入文件中
wall_file.write(data)
return True
#2.定义协程逻辑(下载图片)代码
def download(url,file_name):
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
print('图片%s下载等待'%file_name)
response=requests.get(url,headers=headers)#使用url请求网络地址
with open(file_name,'wb') as f:
f.write(response.content) #此处注意得保存二进制数据才能正常查看图片
print('图片%s下载完毕'%file_name)
def locateAndDownload():
with open('wallpaper.html','r',encoding='utf-8') as file:
data=file.read()
pattern=re.compile('<img.*src.{1,3}[\'|\"](\S*)[\'\"]')
result=re.findall(pattern,data)#将我们所要的图片地址筛选出来
#print(result) #result就是我们的壁纸下载链接的相对地址
prefix='https://wallhalla.com/'
for i in range(len(result)):
downloadlink = os.path.join(prefix, result[i][1:])
# print(downlink)
gevent.joinall( #使用joinall方法注册任务并执行任务
[#gevent.spawn()方法创建任务
gevent.spawn(download, downloadlink, str(i) + '.jpg')
]
)
if __name__=='__main__':
if digestPage():
locateAndDownload()
else:
print("Sorry,我们无法帮您下载图片哦")
爬网页并协程下载图片
最新推荐文章于 2022-06-27 07:50:54 发布