Python研究生招生网院校、地区等信息

一、开始

  • 方便查找自己需要寻找的地区,院校,专业等
  • 结果生成csv格式,最后转为excel,方便观看
  • 使用到的库:os,requests,lxml,csv,time

二、过程

  • 首先需要进入其网址:https://yz.chsi.com.cn/zsml/queryAction.do
  • 然后可以依次选择自己需要的信息
  • 随后填写参数内容
  • 代码如下
# 选择省市(填序号如:11-65)
ssdm = '11'
# 输入单位名称(直接填文字)
dwmc = ''
# 选择门类(专业学位填zyxw,学术学位填序号如:01-14)
mldm = '01'
# 选择专业领域(填序号如:0801-0872)
yjxkdm = '0101'
# 选择专业(直接填文字如:哲学)
zymc = '哲学'
# 选择学习方式(全日制为1,非全日制为2)
xxfs = 1
  • 填写自己需要保存文件的路径
  • 代码如下
# 保存路径
path_csv = r"C:\Users\14347\Desktop\招生信息"
# 保存名称(默认名称:门类-专业领域-专业.csv)
csv_name = f'{mldm + "-" + yjxkdm + "-" + zymc}.csv'
  • 运行完成后便可以在填写的路径下找到文件

三、保存与转换

切记(第二次运行会覆盖第一次的文件,如需保存请更改文件名)
如需转换为excel格式(依次进行):
  • 数据
  • 从文本/CSV
  • 选择需要转换的文件
  • 加载
  • 另存为
  • 选择保存类型为Excel工作簿
随后便可以根据自己需求选择所要查找的信息

四、源代码

import os.path
import requests
from lxml import etree
import csv
import time


def find_data():
    # 拼接url
    title = "https://yz.chsi.com.cn"

    header = {
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:102.0) Gecko/20100101 Firefox/102.0"
    }
    # 网址
    url = 'https://yz.chsi.com.cn/zsml/queryAction.do'

    # 初始化
    count = 0
    page = 0
    choose = True
    page_all = 20
    time_start = time.time()

    # 清空该文件
    with open(os.path.join(path_csv, csv_name), 'w', newline=''):
        pass

    # 获取数据
    while count != page_all:
        count += 1
        params = {
            "ssdm": ssdm,
            "dwmc": dwmc,
            "mldm": mldm,
            "mlmc": "",
            "yjxkdm": yjxkdm,
            "zymc": zymc,
            "xxfs": xxfs,
            "pageno": count,
        }
        response = requests.post(url, headers=header, params=params)
        tree = etree.HTML(response.text)
        url_college_tree = tree.xpath('//*[@id="form3"]/a/@href')
        name_college_tree = tree.xpath('//*[@id="form3"]/a/text()')
        if choose:
            page_all = tree.xpath('//*[@href="#"]/text()')
            page_all = int(page_all[-1])
            choose = False
        # 第一页
        for i in range(len(url_college_tree)):
            page += 1
            with open(os.path.join(path_csv, csv_name), 'a', newline='') as f:
                csv_write = csv.writer(f)
                csv_write.writerow([f'第{page}所学校'])
                csv_write.writerow(['招生单位', '所在地', '院系所', '研究方向', '科目', "招录人数"])
                print(f'----------------------------------第{page}所学校---------------------------------------------')
                csv_write.writerow([name_college_tree[i], tree.xpath(f'.//table/tbody/tr[{i + 1}]/td[2]/text()')[0]])
                print("招生单位:", name_college_tree[i])
                print("所在地:", tree.xpath(f'.//table/tbody/tr[{i + 1}]/td[2]/text()')[0])
                url_college = title + url_college_tree[i]
                response_college = requests.get(url_college, headers=header)
                tree_college = etree.HTML(response_college.text)
                search_college_tree_view = tree_college.xpath('.//table/tbody/tr/td[8]/a/@href')
                school_college_tree = tree_college.xpath('.//table/tbody/tr/td[2]/text()')
                direction_college_tree = tree_college.xpath('.//table/tbody/tr/td[4]/text()')
                enrollment_count_tree = tree_college.xpath('.//table/tbody/tr/td[7]/script/text()')
                # 第二页
                for j in range(len(search_college_tree_view)):
                    url_subject = title + search_college_tree_view[j]
                    response_subject = requests.get(url_subject, headers=header)
                    tree_subject = etree.HTML(response_subject.text)
                    search_college_tree = tree_subject.xpath('//*[@class="zsml-result"]/table/tbody/tr/td/text()')
                    print("院系所:", school_college_tree[j])
                    print("研究方向:", direction_college_tree[j])
                    if enrollment_count_tree:
                        print("招录人数:", enrollment_count_tree[j].split(":")[1].split("(")[0])
                    print("科目:", end='')
                    # 第三页
                    subject_text = ''
                    for k in range(len(search_college_tree)):
                        subject = search_college_tree[k].replace("\r\n", '').replace(' ', '')
                        if subject != '':
                            subject_text += subject
                            subject_text += ','
                            print(subject, end=',')
                    print('\n')
                    csv_write.writerow([name_college_tree[i], tree.xpath(f'.//table/tbody/tr[{i + 1}]/td[2]/text()')[0],
                                        school_college_tree[j], direction_college_tree[j], subject_text,
                                        enrollment_count_tree[j].split(":")[1].split("(")[0]])
    time_end = time.time()

    print(f"----------------------总耗时:{time_end - time_start}----------------------")


if __name__ == '__main__':
    # ---------------------------------------参数填入--------------------------------------------
    # ---------------------------------------参数填入--------------------------------------------
    # ---------------------------------------参数填入--------------------------------------------
    # 选择省市(填序号如:11-65)
    ssdm = '11'
    # 输入单位名称(直接填文字)
    dwmc = ''
    # 选择门类(专业学位填zyxw,学术学位填序号如:01-14)
    mldm = '01'
    # 选择专业领域(填序号如:0801-0872)
    yjxkdm = '0101'
    # 选择专业(直接填文字如:哲学)
    zymc = '哲学'
    # 选择学习方式(全日制为1,非全日制为2)
    xxfs = 1
    # 保存路径
    path_csv = r"C:\Users\14347\Desktop\招生信息"
    # 保存名称
    csv_name = f'{mldm + "-" + yjxkdm + "-" + zymc}.csv'
    # ---------------------------------------参数填入--------------------------------------------
    # ---------------------------------------参数填入--------------------------------------------
    # ---------------------------------------参数填入--------------------------------------------

    # 查找数据
    find_data()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值