目录
引用包
import requests
import json
import os
获取网页源码
#获取网页源代码
def login(url):
headers = {
'User-Agent': 'Mozilla/5.0 (iPhone; CPU iPhone OS 13_2_3 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/13.0.3 Mobile/15E148 Safari/604.1',
'Cookie': ''
}
params = {
'aid': '24',
'app_name': 'web_search',
'offset': '0',
'format': 'json',
'keyword': '中国',
'autoload': 'true',
'count':'20',
'en_qc': '1',
'cur_tab': '1',
'from': 'search_tab',
'pd': 'synthesis',
'timestamp': '1591782225749'
}
try:
r = s.post(url, headers=headers,params=params,timeout=10)
r.raise_for_status()
r.encoding = 'utf-8'
result = json.loads(r.text)
return result
except Exception as e:
print(e)
return ""
将json中的每个图集的url找出来
# 将json中的每个图集的url找出来。
def get_data(dic):
img_list=[]
data=dic['data']
for i in range(len(data)):
#print(item)
item=data[i]
if 'avatar_url' in item:
avater_url=item['avatar_url']
img_list.append(avater_url)
if 'image_list' in item:
for img in item['image_list']:
url=img['url']
img_list.append(url)
if 'image_url' in item:
image_url = item['image_url']
img_list.append(image_url)
if 'large_image_url' in item:
large_image_url=item['large_image_url']
img_list.append(large_image_url)
print(img_list)
return img_list
保存图片到本地
#保存图片到本地
def search_img(data):
for i in range(len(data)):
url=data[i]+'.jpg'
print(url)
root ="..//pics/"
path=root+url.split('/')[-1]
if not os.path.exists(root):
os.mkdir(root)
if not os.path.exists(path):
r=requests.get(url)
with open(path,'wb') as f:
f.write(r.content)
f.close()
print("文件保存成功")
else:
print("文件已存在")
总函数
if __name__ == '__main__':
# 登录地址
login_url = 'https://www.toutiao.com/api/search/content/?'
# 创建会话对象
s = requests.session()
dic_data = login(login_url)
data=get_data(dic_data)
search_img(data)