urlencode主要是百分号解码
HTPAdapter主要是在session实例上挂载Adapter实例, 目的: 请求异常时,自动重试
可以修改keyword 进行下载其他关键字的图片
import json
import os
import urllib
from urllib.parse import urlencode
import requests
import time
from requests.adapters import HTTPAdapter
# from requests.api import get
# from requests.models import Response
#https://www.helloworld.net/p/1580796665
# 设置重连次数为 3 次
requests.adapters.DEFAULT_RETRIES = 3
s = requests.session()
s.keep_alive = False
def get_images(re_json):
print('111111111111111111111')
# print(re_json)
print('222222222222222222222')
images=re_json.get('rawData').get('data')
# print(images)
try:
for image in images:
# print(image)
link = image.get('img_url')
print(link)
yield link
except Exception as ex:
print("-----------出错继续----------")
def get_page(page_num):
#https://so.toutiao.com/search?keyword=街拍&pd=atlas&dvpf=pc&aid=4916&page_num=0&search_json={"from_search_id":"202107261607330102121920454A86E71D","origin_keyword":"街拍","image_keyword":"街拍"}
global headers
headers = {
'Connection': 'keep-alive',
'Pragma': 'no-cache',
'Cache-Control': 'no-cache',
'sec-ch-ua': '" Not;A Brand";v="99", "Google Chrome";v="91", "Chromium";v="91"',
'sec-ch-ua-mobile': '?0',
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.77 Safari/537.36',
'Accept': '*/*',
'Sec-Fetch-Site': 'same-origin',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Dest': 'empty',
'Referer': 'https://so.toutiao.com/search?keyword=%E8%A1%97%E6%8B%8D&pd=atlas&dvpf=pc&aid=4916&page_num=0&search_json={%22from_search_id%22:%22202107261607330102121920454A86E71D%22,%22origin_keyword%22:%22%E8%A1%97%E6%8B%8D%22,%22image_keyword%22:%22%E8%A1%97%E6%8B%8D%22}',
'Accept-Language': 'zh-CN,zh;q=0.9,ja-CN;q=0.8,ja;q=0.7',
'Cookie': 'passport_csrf_token=e12bf88609ec09f7b050695177292c19; ttwid=1%7CtKUX3oTeFsZd_HWDBF0xNAl1uCb6fAfHEskBGp4mgac%7C1627286233%7C5f3b5a0fa02e9a26deac7c709209b5516591d760cf3b0b9d6ce2163e41febfc0; _S_DPR=1; _S_IPAD=0; MONITOR_WEB_ID=6969408645571298846; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; _S_WIN_WH=230_637; ttwid=1%7CtKUX3oTeFsZd_HWDBF0xNAl1uCb6fAfHEskBGp4mgac%7C1627286233%7C5f3b5a0fa02e9a26deac7c709209b5516591d760cf3b0b9d6ce2163e41febfc0'
}
params = {
'keyword': '比基尼',
'pd':'atlas',
'dvpf':'pc',
'aid':4916,
'page_num':page_num,
'search_json':urllib.parse.unquote("%7B%22from_search_id%22%3A%22202107261607330102121920454A86E71D%22%2C%22origin_keyword%22%3A%22%E8%A1%97%E6%8B%8D%22%2C%22image_keyword%22%3A%22%E8%A1%97%E6%8B%8D%22%7D"),
'rawJSON': 1,
'search_id':'2021072616100601015005901629CDDE2E'
}
url = 'https://so.toutiao.com/search?'+ urlencode(params)
print(url)
# url='https://so.toutiao.com/search'
try:
response= s.get(url=url,headers=headers,params=params)
s.close()
if response.status_code==200:
return response.json()
except requests.ConnectionError:
return None
def saving_img(link):
print(f'-------正在下载第{link}张图片')
filename = os.path.basename(link)
data = requests.get(link).content
if not os.path.exists("./image1"):
os.mkdir("./image1")
with open(f'./image1/{filename}','wb')as f:
f.write(data)
time.sleep(1)
def main(page_num):
re_json = get_page(page_num)
for link in get_images(re_json):
saving_img(link)
if __name__ == '__main__':
for i in range(0,100):
main(i)
print('-------------结束下载----------')