上代码这里写代码片
import re,json,requests,os
from urllib import request
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.26 Safari/537.36 Core/1.63.5702.400 QQBrowser/10.2.1893.400'
}
for i in range(0,60,20):
url='https://www.toutiao.com/search_content/?offset='+str(i)+'&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&cur_tab=3&from=gallery'
req = request.Request(url,headers=headers)
response = request.urlopen(req)
html = response.read().decode('utf-8')
html = json.loads(html)
try:
for url_list in html['data']:
url = url_list['article_url']
response = requests.get(url,headers=headers)
html = response.text
req = r'gallery: JSON\.parse\((.*)\),'
html1 = re.search(req,html)
html1 = html1.group(1)
html2=json.loads(html1)
html3=json.loads(html2)
list = html3['sub_images']
for image in list:
image_url = image['url']
filename = 'image1/' + image_url.split('/')[-1] + '.jpg'
request.urlretrieve(image_url, filename)
except:
continue