import requests,re import json from urllib import request import os url = "https://www.toutiao.com/search_content/?offset=0&format=json&keyword=%E8%A1%97%E6%8B%8D&autoload=true&count=20&cur_tab=1&from=search_tab" headers = { 'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', } response = requests.get(url,headers=headers) home_json = response.json() #把每页小目录链接给list_data list_data = home_json['data'] #图片路径large_image_url if not os.path.exists('download'): os.mkdir('download') for url_dict in list_data: if 'article_url' in url_dict : #把当页所有图片集链接给url_list url_list = url_dict['article_url'] response = requests.get(url_list,headers=headers) xiao_home = response.text res_zhengze = r'gallery: JSON\.parse\((.*)\),' pattern = re.search(res_zhengze,xiao_home) res_1 = pattern.group(1) res_2 = json.loads(res_1) # print(res_2) res_3 = json.loads(res_2) # print(res_3) for res_4 in res_3['sub_images']: # print(res_4) #输出图片链接 url_tu = res_4['url'] # print(url_tu) #拼接‘路经’+名称+后缀 filename = 'download/' + url_tu.split('/')[-1] + '.jpg' # 下载图片 request.urlretrieve(url_tu, filename)
爬虫,一页图片
最新推荐文章于 2022-11-24 16:04:44 发布