import requests
import json
if __name__ == '__main__':
headers = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.122 Safari/537.36'
}
url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsList'
id_list = [] # 存储页面的所有企业id
info_list = [] # 存储所有企业的详细信息对象
for page in range(1,137):
page=str(page)
param ={
'on': 'true',
'page': page,
'pageSize': '15',
'productName':'',
'conditionType': '1',
'applyname':'',
'applysn':''
}
data_obj = requests.post(url=url,data=param,headers=headers).json()
#提取json中的指定部分内容,此处为id
for obj in data_obj['list']:
id_list.append(obj['ID'])
url = 'http://125.35.6.84:81/xk/itownet/portalAction.do?method=getXkzsById'
for id in id_list:
param={
'id':id
}
cur_obj = requests.post(url=url, data=param, headers=headers).json()
print('公司名称:' + cur_obj["epsName"] + ' 法人: ' + cur_obj["legalPerson"] + "地址:" + cur_obj["epsAddress"])
info_list.append(cur_obj)
# fp = open("食药监局所有企业的详细信息.json",'w',encoding='utf-8')
# json.dump(info_list,fp=fp,ensure_ascii=False)
print("爬取完成!")