import requests
def get_page():
for i in range(20,120,20):
#构建url
url = 'https://www.toutiao.com/api/search/content/?aid=24&app_name=web_search&offset=40&format=json&keyword=%E7%BE%8E%E5%A5%B3&autoload=true&count={}&en_qc=1&cur_tab=1&from=search_tab&pd=synthesis×tamp=1554722202686'.format(str(i))
try:
res = requests.get(url)
if res.status_code == 200:
parse(res.json())
except requests.ConnectionError:
return None
def parse(json):
if json.get('data'):
for item in json.get('data'):
title = item.get('title')
image_url = item.get('image_url')
print(title,image_url)
def main():
get_page()
if __name__ == '__main__':
main()
今日头条网页数据也是异步加载的,搜索:数据,F12,向下翻页,在XHR查找Ajax请求,果然找到了它,网页规律url中count=从20,40,60变化,一个for循环搞定,其他的就和普通网页一样了,json格式的网页更好解析import requestsdef get_page(): for i in range(20,120,20): #构建url ...