跟着B站大神的课做出来的,但是好多地方不明白头疼啊!!
# -*- coding: utf-8 -*-
# @File : 糗图2.py
# @Author : 刘航宇
# @Time : 2021/07/20 19:48:07
import urllib
import requests
import re
import os
import time
def handle_request(url, page):
url = url + str(page) + '/'
# print(url)
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"}
response = requests.get(url=url, headers=headers)
return response
def download_image(content):
pattern = re.compile(
r'<div class="thumb">.*?<img src="(.*?)".*?</div>', re.S)
lt = pattern.findall(content)
# print(lt)
# 遍历列表,依次下载图片
for image_src in lt:
# 先处理image_src
image_src = 'https:' + image_src
# 发送请求,下载图片
# 创建文件夹
dirname = 'qiutu'
if not os.path.exists(dirname):
os.mkdir(dirname)
# 图片的名字叫啥
filename = image_src.split('/')[-1]
filenath = dirname + '/' + filename
print('%s图片正在下载.....'% filename)
urllib.request.urlretrieve(image_src,filenath)
print('%s图片结束下载.....'% filename)
time.sleep(1)
def main():
url = 'https://www.qiushibaike.com/imgrank/page/'
start_page = int(input("请输入起始页码:"))
end_page = int(input("请输入结束页码:"))
for page in range(start_page, end_page + 1):
print('第%s页开始下载...'% page)
# 生成请求对象
requests = handle_request(url, page)
# 发送请求对象,获取响应内容
content = requests.content.decode('utf-8')
# 解析内容,提取所有的图片链接,下载图片
download_image(content)
print('第%s页下载完毕'% page)
print()
print()
time.sleep(2)
if __name__ == "__main__":
main()
编辑器:VS code