前言
QS世界大学排名(QS World University Rankings)是由英国一家国际教育市场咨询公司Quacquarelli Symonds(简称QS)所发表的年度世界大学排名
import requests # 发送请求
import re
import csv
with open('rank.csv', mode='a', encoding='utf-8', newline='') as f:
csv_writer = csv.writer(f)
csv_writer.writerow(['country', 'rank', 'region', 'score_1', 'score_2', 'score_3', 'score_4', 'score_5', 'score_6', 'total_score', 'stars', 'university', 'year'])
def replace(str_):
str_ = re.findall('<div class="td-wrap"><div class="td-wrap-in">(.*?)</div></div>', str_)[0]
return str_
url = 'https://www.qschina.cn/sites/default/files/qs-rankings-data/cn/2057712_indicators.txt'
# 1. 发送请求
response = requests.get(url)
# <Response [200]>: 请求成功
# 2. 获取数据
json_data = response.json() # Python 字典
# 3. 解析数据
# 字典
data_list = json_data['data']
for i in data_list:
country = i['location'] # 国家/地区
rank = i['overall_rank'] # 排名
region = i['region'] # 大洲
score_1 = replace(i['ind_76']) # 学术声誉
score_2 = replace(i['ind_77']) # 雇主声誉
score_3 = replace(i['ind_36']) # 师生比
score_4 = replace(i['ind_73']) # 教员引用率
score_5 = replace(i['ind_18']) # 国际教师
score_6 = replace(i['ind_14']) # 国际学生
total_score = replace(i['overall']) # 总分
stars = i['stars'] # 星级
uni = i['uni'] # 大学名称
university = re.findall('<div class="td-wrap"><div class="td-wrap-in"><a href=".*?" class="uni-link">(.*?)</a></div></div>', uni)[0]
year = "2021" # 年份
print(country, rank, region, score_1, score_2, score_3, sco