爬取一下内容:
代码:
import requests
import bs4
import re
import json
# URL
url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsList'
# 获取信息
data = {
'on': 'true',
'page': '1',
'pageSize': '15',
'productName': '广州',
'conditionType': '2',
'applyname': '',
'applysn': '',
}
# UA伪装
hearders = {
'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.88 Safari/537.36 Edg/87.0.664.60'
}
# 取得一个响应数据
yjj = requests.post(url=url, data=data, headers=hearders)
yjj_json = yjj.json()
fp = open('yjj.json', 'w', encoding='utf-8')
json.dump(yjj_json, fp=fp, ensure_ascii=False)
# 用列表存储ID值
id_list = []
for _id in yjj_json['list']:
id_list.append(_id['ID'])
# 控制一行输出 4 个ID值
for _ in range(len(id_list)):
if (_+1)%4 == 0:
print(id_list[_], end='\n')
else:
print(id_list[_], end='\t')
detail_url = 'http://scxk.nmpa.gov.cn:81/xk/itownet/portalAction.do?method=getXkzsById'
all_datail = []
for id in id_list:
data = {
'id': id
}
detail = requests.post(url=detail_url, data=data, headers=hearders)
detail_json = detail.json()
all_datail.append(detail_json)
fp = open('detail.json', 'w', encoding='utf-8')
json.dump(all_datail, fp=fp, ensure_ascii=False)
结果: