爬虫:2021广东省普通专升本各院校专业招生计划汇总表

代码如下:
import requests
import json
import pandas as pd
def Get_yxdm_yxmc_list():
'''
1.获取院校信息
2.数据选择 例如:[{'yxdm':'12345','yxmc':'华南师范大学'}] 的字典列表yxdm_yxmc_list
3.返回 yxdm_yxmc_list
'''
url = "https://www.eeagd.edu.cn/ptzsbks/public/jbxx/getYxxxBm.jsmeb"
headers = {
'Cookie': 'BIGipServerptzsbks_pool=1053294602.17439.0000',
'Host': 'www.eeagd.edu.cn',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.128 Safari/537.36'
}
json = requests.request("POST", url, headers=headers).json()
datalist = json["result"]["result"]
yxdm_yxmc_list = []
for item in datalist:
a = {}
a['yxdm'] = item['yxdm']
a['yxmc'] = item['yxmc']
yxdm_yxmc_list.append(a)
return yxdm_yxmc_list
def Get_zyjh_list(yxdm, name, zyjh_lists):
'''
参数:院校代码,院校名称,全局列表zyjh_lists
1. 根据 院校代码 获取 院校的跟专业招生计划信息datalist
2. 专业招生计划信息 添加 key-value yxmc:华南理工大学
3. 往 全局列表zyjh_lists 添加 专业招生计划信息
'''
url = "https://www.eeagd.edu.cn/ptzsbks/public/jbxx/getZyjhByYxdmBm.jsmeb?"+yxdm
headers = {
'Cookie': 'BIGipServerptzsbks_pool=1053294602.17439.0000',
'Host': 'www.eeagd.edu.cn',
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac 05 X 10_11_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/52.0.2743.116 Safari/537.36'
}
json = requests.request("POST", url, headers=headers).json()
datalist = json["result"]["result"]
for item in datalist:
item['yxmc'] = name
zyjh_lists.append(item)
def main():
yxdm_yxmc_list = Get_yxdm_yxmc_list()
zyjh_lists = []
for item in yxdm_yxmc_list:
id = item['yxdm']
name = item['yxmc']
Get_zyjh_list(id, name, zyjh_lists)
df = pd.DataFrame(zyjh_lists)
df.to_excel('所有专业招生计划表.xlsx', 'zyjhsheet')
if __name__ == '__main__':
main()