抓取中国最好大学排名-社会声誉排名2018（request,bs4,csv）

最新推荐文章于 2022-11-30 19:52:27 发布

Hello_Jandy

最新推荐文章于 2022-11-30 19:52:27 发布

阅读量842

点赞数

分类专栏：爬虫

本文链接：https://blog.csdn.net/weixin_42141853/article/details/80951379

版权

import csv

import requests
from bs4 import BeautifulSoup

#请求头
headers={'User-Agent':"Opera/9.80(WindowsNT6.1;U;en)Presto/2.8.131Version/11.11"}

def getInfo(url):
    #获取页面代码
    html=requests.get(url,headers=headers).content.decode('utf-8','ignore')

    soup=BeautifulSoup(html,'lxml')

    #获取表头
    theadList=[]
    thead=soup.select('thead th')
    for head in thead:
        theadList.append(head.text)
    print(theadList)
    with open('ranking.csv', 'w') as f:
        writer = csv.writer(f)
        writer.writerow(theadList)


    #获取学校列表
    schoolList=soup.select('tr.alt')
    for school in schoolList:
        #排名
        ranking=school.select('td:nth-of-type(1)')[0].text
        #学校名称
        schoolName=school.select('td:nth-of-type(2)')[0].text
        #省市
        schoolAddress=school.select('td:nth-of-type