千图网图片下载
只能下载部分图片
import os.path
import requests
from spider import Spider
def parse_real_url(src_url):
base_url = 'http://apifont.58pic.com/index.php?c=Download&a=downPng&im=//58pic'
download_url = base_url + src_url.split('58pic')[-1].split('!')[0]
return download_url
def run(keyword_pinyin, category):
url = 'https://www.58pic.com/tupian/{}-852-0-default-0-0-0-0_2_0_0_0_0_0-0-0-0-0-0-0-0-0-0-0-0-{}.html?is_ajax=1&is_new=1'
for page in range(1, 130):
print(f'下载第{page}页数据')
page_url = url.format(keyword_pinyin, page)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 5.1; rv:5.0) Gecko/20100101 Firefox/6.1856'
}
print(page_url)
resp = requests.get(url=page_url, headers=headers)
print(resp.text)
pic_list = resp.json()['data']['pics']
task_list = []
for pic in pic_list:
pid = pic['id']
title = pic['title']
pic_url = pic['picurl']
if 'freepik' in pic_url:
continue
save_name = title[:10] + '_' + str(pid) + '.jpg'
save_dir = fr'./{category}'
save_path = os.path.join(save_dir, save_name)
download_url = parse_real_url(pic['picurl'])
print('下载地址:', download_url)
task_list.append((download_url, save_path))
Spider(task_list=task_list, thread_num=3).run()
if __name__ == '__main__':
keyword_pinyin = 'shanmai'
run(
keyword_pinyin=keyword_pinyin,
category="山脉"
)