教师信息爬取，并导出csv文件_大学教师信息爬取-CSDN博客

本文链接：https://blog.csdn.net/2301_79271343/article/details/141955310
import requests
from bs4 import BeautifulSoup
import csv

professors = []
associate_professors = []
lecturers = []
def get_news(url):
    teachers = []
    global professors
    global associate_professors
    global lecturers
    try:
        # 发送HTTP GET请求
        response = requests.get(url)
        print("Response status code:", response.status_code)

        # 检查请求是否成功
        if response.status_code == 200:
            # 使用BeautifulSoup解析HTML
            soup = BeautifulSoup(response.content, 'html.parser')

            # 选择新闻列表项
            for i in range(1, 4):
                news_list = soup.select(f'.part{i} a')
                # 遍历新闻列表项
                for news in news_list:
                    new_info = {}
                    name = news.find('h3').get_text().strip()  # 获取名字
                    name = name.replace('名字：', '')
                    position = news.find('p').get_text().strip()  # 获取职称
                    position = position.replace('职称：', '')  # 删除特定的字
                    if len(news.find_all('p')) > 1:
                        research_direction = news.find_all('p')[1].get_text().strip()  # 获取研究方向
                        research_direction = research_direction.replace('研究方向：', '')
                    else:
                        'N/A'
                    new_info['name'] = name
                    new_info['position'] = position
                    new_info['research_direction'] = research_direction
                    print(f'名字: {name}')
                    print(f'{position}')
                    print(f' {research_direction}')
                    print('----------------------------------')
                    if i == 1:
                        professors.append(new_info)
                    elif i == 2:
                        associate_professors.append(new_info)
                    else:
                        lecturers.append(new_info)
                    teachers.append(new_info)
        else:
            print('请求失败，状态码：', response.status_code)
    except Exception as e:
        print('请求或解析过程中发生错误:', e)

    return teachers


# 数据写入
with open("teatchers.csv", "w", newline='', encoding='utf-8') as csvfile:
    members = get_news('https://ss.nuc.edu.cn/szdw/jgml.htm')
    writer = csv.writer(csvfile)
    writer.writerow(["姓名", "职称", "研究方向"])
    # 选择你要导出的讲师还是教授
    print("导出\n1.教授\n2.副教授\n3.讲师\n4.全部")
    choice = input("选择你要导出的讲师还是教授：")
    if choice == '1':
        for member in professors:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")
    elif choice == '2':
        for member in associate_professors:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")

    elif choice == '3':
        for member in lecturers:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")

    elif choice == '4':
        for member in members:
            writer.writerow([member['name'], member['position'], member['research_direction']])
        print("导出成功")