直接上代码,有注释:
import urllib.request
import urllib.parse
import re
import os
def handle_request(url, page):
url = url + str(page) + '/'
#print(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:68.0) Gecko/20100101 Firefox/68.0',
}
request = urllib.request.Request(url=url, headers=headers)
return request
def download_image(content):
pattern = re.compile(r'<div class="thumb">.*?<img src="(.*?)" .*?>.*?</div>',re.S)
It = pattern.findall(content)
print(len(It))
#遍历列表,依次下载图片
for image_src in It:
#先处理image_src
image_src = 'https:' + image_src
#发送请求,下载图片
#创建文件夹
dirname = 'qiutu'
if not os.path.exists(dirname):
os.mkdir(dirname)
#图片名字叫啥
filename = image_src.split('/')[-1]
filepath = dirname + '/' + filename
print('%s图片正在下载.........'%filename)
urllib.request.urlretrieve(image_src, filepath)
print('%s图片结束下载.........' % filename)
def main():
url = 'https://www.qiushibaike.com/pic/page/'
start_page = int(input('请输入起始页码:'))
end_page = int(input('请输入结束页码:'))
for page in range(start_page, end_page + 1):
print('第%s也开始下载....'%page)
#生成请求对象
request = handle_request(url, page)
# 发送请求对象,获取请求内容
content = urllib.request.urlopen(request).read().decode()
#解析内容,提取所有图片链接下载链接
download_image(content)
print('第%s也结束下载....' % page)
print()
print()
if __name__ =='__main__':
main()
下图是结果:
更多学习资料,欢迎关注微信公众号“源起1024”
或扫描下方二维码关注: