前言
软科中国大学排名以专业、客观、透明的优势赢得了高等教育领域和社会的广泛关注和认可,本次将利用Python对我国大学排名和分布情况进行一番研究。
先展示下爬虫的源码
import requests
import parsel
import csv
f = open('排名.csv', mode='a', encoding='utf-8', newline='')
csv_writer = csv.DictWriter(f, fieldnames=['名次', '学校名称', '综合得分', '星级排名', '办学层次'])
url = 'http://m.gaosan.com/gaokao/265440.html'
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/81.0.4044.138 Safari/537.36'
}
response = requests.get(url=url, headers=headers)
response.encoding = response.apparent_encoding
selector = parsel.Selector(response.text)
trs = selector.css('#page tr')
for tr in trs:
dit = {
}
ranking = tr.css('td:nth-child(1)::text').get()
dit['名次'] = ranking
school = tr.css('td:nth-child(2)::text').get()
dit['学校名称'] = school
score = tr.css('td:nth-child(3)::text').get()
dit['综合得分'] = score
star = tr.css('td:nth-child(4)::text').get()
dit['星级排名'] = star
level = tr.css('td:nth-child(5)::text&