import requests
import csv
import time
from pyecharts import options as opts
from pyecharts.charts import Map
class DaXue():
# 初始化
def __init__(self):
# 记录大学总数量
self.num = 1
# 设置请求头
self.headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/94.0.4606.71 Safari/537.36"
}
# 初始化csv,并将获取内容保存
self.file = open('大学热度排行utf-8.csv', 'a', encoding='utf-8-sig', newline='')
self.write_csv = csv.writer(self.file)
self.write_csv.writerow(
['排行', '大学名称', '公/私办', '所在省份','所在城市', '具体地址', '办学层次', '办学性质', '总查询热度', '本年查询热度'])
def save_file(self, file_data):
# 保存到csv文件
# file_data[]
self.write_csv.writerow(file_data)
def run(self):
# 循环爬取所有学校的热度
for i in range(1, 79):
print(f'正在爬取第{i}页内容')
url = f'https://api.eol.cn/gkcx/api/?access_token=&admissions=¢ral=&department=&dual_class=&f211=&f985=&is_doublehigh=&keyword=&nature=&page={i}&province_id=&school_type=6000&signsafe=&size=15&sort=view_total&sorttype=desc&type=&uri=apidata/api/gk/school/lists'
res = requests.get(url, headers=self.headers).json()
page_res = res
# print(page_res)
# 每一页的大学数据存放在列表中
school_list = page_res['data']['item']
# 利用循环将每个大学的数据写入csv文件
for index in school_list:
name = index['name'] # 大学名称
nature_name = index['nature_name'] # 公/私办
province_name = index['province_name']#所在省份
city_name = index['city_name'] # 所在城市
address = index['address'] # 具体地址
level_name = index['level_name'] # 办学层次
type_name = index['type_name'] # 办学性质
view_total = index['view_total'] # 总查询热度
view_year = index['view_year'] # 本年查询热度
# 将数据写入csv文件
self.save_file([
self.num,
name,
nature_name,
province_name,
city_name,
address,
level_name,
type_name,
view_total,
view_year])
print(f'第{self.num}条数据写入成功!')
print('-' * 100)
self.num += 1
#睡眠0.2秒,防止反扒
time.sleep(0.5)
print('数据保存完毕!')
#将数据可视化
class Vasualable():
def count_data(self):
#从保存的csv文件中逐个读取数据
csv_file = csv.reader(open('大学热度排行utf-8.csv','r',encoding='utf-8'))
# print(type(csv_file)) #<class '_csv.reader'>
province = []
total = []
total_dict = {}
for index in csv_file:
# print(type(index)) #<class 'list'>
#第一行舍弃 ['排行', '大学名称', '公/私办', '所在省份', '所在城市', '具体地址', '办学层次', '办学性质', '总查询热度', '本年查询热度']
if index[0].isnumeric():
#将个省份数据统一并放进province_total
total_dict[index[3]] = total_dict.get(index[3],0) +int(index[-1])
# print(total) {'省份':热度,...}
for a,b in total_dict.items():
province.append(a)
total.append(b)
#province与total相同位置一一对应
return province,total
def run(self):
print('正在生成可视化地图....')
province,total = self.count_data()
c = (
Map()
.add("", [list(z) for z in zip(province, total)], "china")
.set_global_opts(
title_opts=opts.TitleOpts(title="本年度各省高校查询热度"),
visualmap_opts=opts.VisualMapOpts(max_=50000000)
)
)
c.render('test.html')
print('地图生成完毕!')
if __name__ == '__main__':
#爬取并保存数据
# DaXue().run()
#根据保存的csv数据生成可视化地图
Vasualable().run()
爬取大学查询热度并生成可视化地图
最新推荐文章于 2024-11-10 13:43:20 发布