import requests, pprint, json, time, os
def get_text(url, offset):
headers = {
'accept': 'application/json, text/javascript',
'accept-encoding': 'gzip, deflate, br',
'accept-language': 'zh-CN,zh;q=0.9',
'content-type': 'application/x-www-form-urlencoded',
'cookie': 'tt_webid=6683811020840027651; '
'WEATHER_CITY=%E5%8C%97%E4%BA%AC;'
' UM_distinctid=16a5483707851e-04cbb1ed61c80b-43450521-1fa400-16a548370791d1;'
' CNZZDATA1259612802=1989342800-1556194055-null%7C1556194055; '
'tt_webid=6683811020840027651; '
'__tasessionId=mhruz58ty1556196061614;'
' csrftoken=50fbd6300ac3c835c93ab56776f11014; '
's_v_web_id=e4bb2fa8bd1d669b46a054a3e5eb3f62',
'referer': 'https://www.toutiao.com/search/?keyword=NBA',
'user-agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko)'
' Chrome/69.0.3497.100 Safari/537.36',
'x-requested-with': 'XMLHttpRequest',
}
params = {
'aid': '24',
'app_name': 'web_search',
'offset': offset * 10,
'format': 'json',
'keyword': 'NBA',
'autoload': 'true',
'count': '20',
'en_qc': '1',
'cur_tab': '1',
'from': 'search_tab',
'pd': 'synthesis',
'timestamp': '1556196114931',
}
try:
r = requests.get(url=url, headers=headers, params=params)
r.encoding = r.apparent_encoding
ret = json.loads(r.text)['data']
return ret
except:
return ''
def get_urls(i):
if "image_list" in i.keys():
urls = i['image_list']
for m in urls:
b = m['url']
return b
def save(b):
with open("C://Users/Administrator/Desktop/mm.txt", "a")as f:
f.write(b + "\n")
if __name__ == '__main__':
url = "https://www.toutiao.com/api/search/content/"
for offset in range(9):
ret = get_text(url, offset)
for i in ret:
b = get_urls(i)
if b == None:
continue
else:
save(b)
结果展示:
思路: