下面用的是普通的下载,因为图片过多,所以下载的速度可能会慢一点。
# 1、通过https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page=0&iOrder=0&iSortNumClose=1&jsoncallback=jQuery171015387338790761063_1587884222210&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=1587884222349
# 可以获取到高清壁纸的url
# 2、获取高清壁纸的url后,通过parse.unqutoe可以进行解码,然后将最后的200改为0 ,就可以得到真实的高清壁纸的图片了
# 3.获取图片的url的地址中有一个参数page,通过修改page的值,可以进行翻页,默认page是从0开始的
import requests
from urllib import parse
from urllib import request
import os
# 用来存放无法下载的链接
un_download = []
# 设置请求头
headers = {
'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/79.0.3945.88 Safari/537.36',
'referer': 'https://pvp.qq.com/web201605/wallpaper.shtml'
}
# 收集打不开链接的图片信息(将200替换成0的链接)
def un_download_url(image_name,image_url):
un_download_info = {}
un_download_info['name'] = image_name
original_url = image_url.replace('/0','/200')
un_download_info['original_url'] = original_url
un_download_info['curr_url'] = image_url
return un_download_info
# 获取图片链接
def exact_image(data):
image_urls = []
for x in range(1,9):
# 解析获取到的链接,高清的图片末尾是/0,所以要将200替换成0,防止url中间有200数值,所以用'/0'替换'/200'
image_url = parse.unquote(data['sProdImgNo_%d'%x]).replace('/200', '/0')
# image_url = parse.unquote(data['sProdImgNo_%d'%x])
image_urls.append(image_url)
return image_urls
# 下载图片
def download(image_urls,image_name,dirpath):
for index, image_url in enumerate(image_urls):
try:
request.urlretrieve(image_url, os.path.join(dirpath, "%d.jpg" % (index + 1)))
print("%s下载完成!" % (image_name + image_url))
except Exception as e:
# 收集打不开的链接信息
un_download_info = un_download_url(image_name, image_url)
un_download.append(un_download_info)
def main():
page_url = 'https://apps.game.qq.com/cgi-bin/ams/module/ishow/V1.0/query/workList_inc.cgi?activityId=2735&sVerifyCode=ABCD&sDataType=JSON&iListNum=20&totalpage=0&page={}&iOrder=0&iSortNumClose=1&iAMSActivityId=51991&_everyRead=true&iTypeId=2&iFlowId=267733&iActId=2735&iModuleId=2735&_=1587884222349'
file_num = 0
# 从第1页到22页
for i in range(0,22):
print("第%d页"%(i+1))
resp = requests.get(page_url.format(i),headers=headers)
result = resp.json()
datas = result['List']
for data in datas:
image_urls = exact_image(data)
image_name = parse.unquote(data['sProdName']).replace("1:1","").strip()
# 存放图片的文件夹名
dirpath = os.path.join("image", image_name)
try:
# 创建对应的文件夹
os.mkdir(dirpath)
download(image_urls,image_name,dirpath)
except FileExistsError as e:
# 防止获取到的文件名有重复的,就在重复的文件名末尾加个2
# 如果有三个以及三个以上重复的,重新写下面这一句代码
os.mkdir(dirpath + '2')
download(image_urls, image_name, dirpath+'2')
file_num += 1
return file_num
# 2、只显示url
# for image_url in image_urls:
# print('='*30)
# print(image_name)
# print(image_url)
# print('='*30)
if __name__ == '__main__':
file_num = main()
print(un_download)
print('共下载%d种图片'%file_num)