python爬虫实战-爬取糗图图片并保存至本地文件夹(正则)
import urllib.request
import urllib.parse
import re
import os
import time
def handle_request(url,page):
url=url+str(page)+'/'
headers={
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36',
}
request=urllib.request.Request(url,headers=headers)
return request
def download_image(content):
pattern=re.compile(r'<div class="thumb">.*?<img src="(.*?)" alt=".*?">.*?</div>',re.S)
ret=pattern.findall(content)
for image_src in ret:
image_src = 'https:'+image_src
dirname='qiutu'
if not os.path.exists(dirname):
os.mkdir(dirname)
filename=image_src.split('/')[-1]
filepath=dirname+'/'+filename
print('%s图片正在下载......'%filename)
urllib.request.urlretrieve(image_src,filepath)
time.sleep(1)
def main():
url="https://www.qiushibaike.com/pic/page/"
start_page=int(input('请输入开始页码:'))
end_page=int(input('请输入结束页码:'))
for page in range(start_page,end_page+1):
request=handle_request(url,page)
content=urllib.request.urlopen(request).read().decode()
print('第%s页开始下载......' %page)
download_image(content)
print('第%s页下载完毕......' %page)
time.sleep(1)
if __name__ == '__main__':
main()