# coding:utf-8
import requests
import json
url = 'https://www.wgsnchina.cn/api/cherry/search/query'
payload = {"filters": {"categories": ["108651"]}, "q": [], "log": "false", "lang": "cs",
"params": {"limit": 200, "offset": 200, "object_id": "null", "sort": [{"field": "add_date"}]}}
headers = {
# 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:59.0) Gecko/20100101 Firefox/59.0',
"accept": "application/json, text/plain, */*",
"accept-encoding": "gzip, deflate, br",
"accept-language": "zh-CN,zh;q=0.9",
"content-length": "151",
"content-type": "application/json;charset=UTF-8",
"referer": "https://www.wgsnchina.cn/library/results/ab745207e8ed3dcfa16b4814748beead",
"user-agent": "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.181 Safari/537.36",
"origin": "https://www.wgsnchina.cn",
# cookie: ss_lang=cs; ss_token=5d5b5dbec4322f50c1cb5c198840c3d7; product=WGSN; ss_udid=d609dc6936f5834709aa48b78f2903c9; PHPSESSID=rfuu0fivbr46l3qcpf2hccggbf
}
r = requests.post(url, data=json.dumps(payload), headers=headers)
print(r)
1、安装vpn
https://vip.dhu.edu.cn/prx/000/http/localhost/login/index.html(不用管)
http://support.arraynetworks.com.cn/troubleshooting/index.html
http://106.15.64.216:8080/zh/troubleshooting(vpn配置软件的下载地址)
在win7上,安装软件
配置软件
配置文件,增加
站点名称:东华大学
主机:vip.dhu.edu.cn
用户名:2160771
密码:1993talentZSzzl
连接vpn
2、访问目标网站(下载图片)
https://www.wgsnchina.cn/library/results/ab745207e8ed3dcfa16b4814748beead
# coding:utf-8
import requests
import json
import os
import sys
# 判断是否有命令参数
if len(sys.argv) == 2:
file_No = int(sys.argv[1])
else:
file_No = 1
for i in range(file_No - 1, 500):
limit_int = 20000
offset_int = i * 10000
url = 'https://www.wgsnchina.cn/api/cherry/search/query'
payload = {"filters": {"categories": ["108651"]}, "q": [], "log": False, "lang": "cs",
# ["108651", "31071", "20963", "25003", "951", "2026695", "2026696", "2026693", "56894", "2026694"]
# payload = {"filters": {"categories": ["31071"]}, "q": [], "log": False, "lang": "cs",
# payload = {"filters": {"categories": ["20963"]}, "q": [], "log": False, "lang": "cs",
"params": {"limit": limit_int, "offset": offset_int, "object_id": None, "sort": [{"field": "add_date"}]}}
headers = {
'accept': 'application/json, text/plain, */*', 'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9', 'content-length': '149', 'content-type': 'application/json;charset=UTF-8',
'cookie': 'ss_udid=d609dc6936f5834709aa48b78f2903c9; exclusionChecked=True; cp_SubStat=Subscriber; _ga=GA1.2.1896488925.1524827736; cp_UserID=-1; ss_lang=cs; product=WGSN; _gid=GA1.2.2046071290.1525336593; hideCherryBeta=true; ss_token=0b1c4760ce102b9213be80decd699ddd; _gat_UA-1004012-2=1; cp_browStat=Logged In; cp_hybridBrowStat=Logged In; _dc_gtm_UA-1004012-2=1; PHPSESSID=mii44amhdbujt8st38hqfd9uke; trwv.uid=stylesight-1524827733879-924d0fe7%3A4; trwsa.sid=stylesight-1525398337152-70147d26%3A2',
'origin': 'https://www.wgsnchina.cn',
'referer': 'https://www.wgsnchina.cn/library/results/ab745207e8ed3dcfa16b4814748beead',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
}
r = requests.post(url, data=json.dumps(payload), headers=headers)
# print(r.content.decode())
json_str = r.content.decode()
json_dict = json.loads(json_str)
# print(json_dict)
results_list = json_dict['data']['results']
url_original_list = []
for item in results_list:
url_media = item["urls"]["detail"]
url_original = url_media.replace('media_', 'original_')
url_original_list.append(url_original)
# print(url_original_list)
print(len(results_list))
# 跳出循环
if len(results_list) == 0:
break
# 保存url的文件路径
pathname = "pic" + "/" + str(payload["filters"]["categories"][0])
if not os.path.exists(pathname): # 统一存放路径不存在,则创建该路径
os.makedirs(pathname)
# url写入文件
filename = pathname + "/" + payload["filters"]["categories"][0] + "_list_" + str(i + 1) + "_" + str(
i * 10000 + 1) + "-" + str((i + 1) * 10000) + ".txt"
print(filename)
with open(filename, 'w')as f:
for url_one in url_original_list:
f.write(str(url_one + "\n"))
# 下载图片
# for url in url_original_list:
# # 构造url
# # url_pic = "https://www.wgsnchina.cn/storage/vol22/ss_image_store/80/18/39/82/original_jpim_spa_aw18_001.jpg"
# url_pic = url
# # 构造请求头:
# headers = {
# 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/66.0.3359.139 Safari/537.36'
# }
# # 请求获取响应
# pic_r = requests.get(url_pic, headers=headers)
# # 保存文件
# pathname="pic" + "/"+ str(payload["filters"]["categories"][0])
# if not os.path.exists(pathname): # 统一存放路径不存在,则创建该路径
# os.makedirs(pathname)
# filename = pathname+"/" + os.path.basename(url_pic)
# with open(filename, 'wb')as f:
# f.write(pic_r.content)
300wurl
https://download.csdn.net/download/u013547552/10400374
参数: