教师信息爬取,并导出csv文件

import requests
from bs4 import BeautifulSoup
import csv

professors = []
associate_professors = []
lecturers = []
def get_news(url):
    teachers = []
    global professors
    global associate_professors
    global lecturers
    try:
        # 发送HTTP GET请求
        response = requests.get(url)
        print("Response status code:", response.status_code)

        # 检查请求是否成功
        if response.status_code == 200:
            # 使用BeautifulSoup解析HTML
            soup = BeautifulSoup(response.content, 'html.parser')

            # 选择新闻列表项
            for i in range(1, 4):
                news_list = soup.select(f'.part{i} a')
                # 遍历新闻列表项
                for news in news_list:
                    new_info = {}
                    name = news.find('h3').get_text().strip()  # 获取名字
                    name = name.replace('名字:', '')
                    position = news.find('p').get_text().strip()  # 获取职称
                    position = position.replace('职称:', '')  # 删除特定的字
                    if len(news.find_all('p')) > 1:
                        research_direction = news.find_all('p')[1].get_text().strip()  # 获取研究方向
                        research_direction = research_direction.replace('研究方向:', '')
                    else:
                        'N/A'
                    new_info['name'] = name
                    new_info['position'] = position
                    new_info['research_direction'] = research_direction
                    print(f'名字: {name}')
                    print(f'{position}')
                    print(f' {research_direction}')
                    print('----------------------------------')
                    if i == 1:
                        professors.append(new_info)
                    elif i == 2:
                        associate_professors.append(new_info)
                    else:
                        lecturers.append(new_info)
                    teachers.append(new_info)
        else:
            print('请求失败,状态码:', response.status_code)
    except Exception as e:
        print('请求或解析过程中发生错误:', e)

    return teachers


# 数据写入
with open("teatchers.csv", "w", newline='', encoding='utf-8') as csvfile:
    members = get_news('https://ss.nuc.edu.cn/szdw/jgml.htm')
    writer = csv.writer(csvfile)
    writer.writerow(["姓名", "职称", "研究方向"])
    # 选择你要导出的讲师还是教授
    print("导出\n1.教授\n2.副教授\n3.讲师\n4.全部")
    choice = input("选择你要导出的讲师还是教授:")
    if choice == '1':
        for member in professors:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")
    elif choice == '2':
        for member in associate_professors:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")

    elif choice == '3':
        for member in lecturers:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")

    elif choice == '4':
        for member in members:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值