import urllib.request
import urllib.parse
import urllib.error
import re
import os
import ssl
ssl._create_default_https_context=ssl._create_unverified_context
path= "./images"headers={"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/75.0.3770.100 Safari/537.36","referer": "https://www.mzitu.com/xinggan/"}
def handler_request(url, pageIndex):
url= url +str(pageIndex)
# 构建请求对象
request= urllib.request.Request(url=url, headers=headers)returnrequest
def get_images_url(content, basePath):
patternNames= re.compile(r'
.*? .*?')patternHrefs= re.compile(r'
.*?')alts=patternNames.findall(content, re.S)
hrefs=patternHrefs.findall(content, re.S)
image_map={}for i inrange(len(hrefs)):
key= alts[i][1: len(alts[i]) - 1]
image_map[key]=hrefs[i]for item inimage_map.items():
image_category_response(item, basePath)
def image_category_response(item, basePath):
save_folder=os.path.join(basePath, alt)ifnot os.path.exists(save_folder):
baseurl= item[1][1: len(item[1]) -1]
try:for pageIndex inrange(pageCount):
page_url= baseurl + "/" +str(pageIndex)try:
request= urllib.request.Request(url=page_url, headers=headers)
response=urllib.request.urlopen(request)
content=response.read().decode()
imgUrl=imgPattern.findall(content, re.S)
download_images(imgUrl[0], save_folder)
except urllib.error.URLErrorase:
raise TypeError("最大页面数{0}".format(pageIndex - 1))
def download_images(url, save_path):
request= urllib.request.Request(url=url, headers=headers)
response=urllib.request.urlopen(request)
with open(os.path.join(save_path, filename),'wb') asfb:
url= 'https://www.mzitu.com/xinggan/page/'start_page= int(input("请输入起始页码:"))
end_page= int(input("请输入结束页码:"))
# 创建根文件夹ifnot os.path.exists(path):
os.mkdir(path)for pageIndex in range(start_page, end_page + 1):
print("...........开始下载第{0}页".format(pageIndex))
save_path=create_folder(pageIndex)
request=handler_request(url, pageIndex)
response=urllib.request.urlopen(request)
content=response.read().decode()
get_images_url(content, save_path)
print("...........结束下载第{0}页".format(pageIndex))
save_path=os.path.join(path, str(pageIndex))ifnot os.path.exists(save_path):
os.mkdir(save_path)return save_path.replace("\\", "/") + "/"