from time import sleep
import requests
url='https://hl.122.gov.cn/m/page/news/getDetails'
headers={'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36'}
for i in range(1,451):
print('第%s页数据如下:'%i)
data={ ##########post方法所必须
'path': 'jgdt',
'page': i,
'size': '10'
} ##########
response=requests.post(url=url,headers=headers,data=data).json() ##post方法
(.json)输出字典形式
(.text)输出字符串形式 eval()字符串转字典
# aa=response['data']
# bb=aa['pageList']
# cc=bb['list']
# print(cc)
# for jj in cc:
# print(jj)
for jj in response['data']['pageList']['list']:
finallyy={}
finallyy['title']=jj['title']
finallyy['href']='https://hl.122.gov.cn/'+jj['link']
finallyy['time']=jj['sortDate']
print(finallyy)
sleep(5)
爬取jiaoguan12123网页数据【post方法】
最新推荐文章于 2025-01-05 11:11:16 发布