上海中考分数线爬虫及使用plotly数据可视化
马上就中考了,蹭一波热度,做了一个上海市近几年中考分数线对比的爬虫,各区学校对比用了柱状图,各校历年分数线变化用了线形图
效果如下:


数据来源:微信小程序 升学查分
数据获取代码
#-----------------引入区-----------------
import requests
import pandas as pd
from urllib.parse import quote
#-----------------常数区-----------------
dict = {}
df=pd.DataFrame()
res=requests.session()
token=''#自行抓包获取
h={"API-CITY": quote('上海市'),"API-TOKEN": token,"Accept-Encoding": "gzip,compress,br,deflate","Connection": "keep-alive","Host": "xiaokedou.xkd100.com","Referer": "https://servicewechat.com/wxd588a54f779b2090/43/page-frame.html","User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.6(0x18000632) NetType/WIFI Language/zh_CN","content-type": "application/json"}
#-----------------函数区-----------------
def getcode(year,vol,area):#获取数据
url='https://xiaokedou.xkd100.com/api/mid/search?year='+year+'&volunteer='+vol+'&area='+area
s=res.get(url,headers=h).json()['data']['list']
dd={}
for i in s:
sname=i['school_name']
scode=i['recruit_code']
sline=i['score_line']
stype=i['volunteer_type']
dd.update({sname:{'stype':stype,'scode':scode,'sline':sline}})
return dd
def getlist():#获取列表构造数据框架
url='https://xiaokedou.xkd100.com/api/mid/where'
s=res.get(url,headers=h).json()['data']['where']
years=s['years']
volunteers=s['volunteers']
areas=s['areas']
for area in areas:
dic = {}
for vol in volunteers:
ys={}
for year in years:
x=getcode(str(year),vol,area)
ys.update({year:x})
dic.update({vol:ys})
dict.update({area:dic})
print(area)
if __name__ == '__main__':
getlist()
pt=df.from_dict(dict)#将dict转为dateframe
pt.to_json('data.json')#转存位json文件
数据可视化代码
#-----------------引入区-----------------
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.io as po
import os
#-----------------常数区-----------------
df=pd.DataFrame()
x=pd.read_json('data.json')
pl=plotly.plot
#-----------------函数区-----------------
def zhu(year,zhiyuan,area,dd,dic):#通过数据画线性图,并存入jpg图片文件
line = go.Bar(x=dd, y=dic)
layout = go.Layout(title=year + area + zhiyuan)
fg = go.Figure(line, layout)
path = 'tmp/' + area + '/' + zhiyuan + '/'
if os.path.exists(path) == False:
os.makedirs(path)
po.write_image(fg, path + year + '.jpg', width=1920, height=1080)
def li(school,zhiyuan,area,dd,dic):#通过数据画柱状图,并存入jpg图片文件
line = go.Scatter(x=dd, y=dic)
layout=go.Layout(title = school+area+zhiyuan)
fg = go.Figure(line,layout)
path='score/'+area+'/'+zhiyuan+'/'
if os.path.exists(path)==False:
os.makedirs(path)
po.write_image(fg,path+school+'.jpg',width=1920,height=1080)
def compare():#横向对比每年各区各校分数线情况
for area in x:
s = df.from_dict(x[area]).T
for zhiyuan in s:
ss=s[zhiyuan][0]
for year in ss:
dic = []
dd = []
for j in ss[year]:
dd.append(j)
dic.append(float(ss[year][j]['sline']))
zhu(year,zhiyuan,area,dd,dic)
def getline():#纵向对比每个学校各志愿分数线情况
for area in x:
s = df.from_dict(x[area]).T
for zhiyuan in s:
ss = s[zhiyuan][0]
schools=ss['2020'].keys()
for school in schools:
dd = []
dic = []
for i in ss:
try:
score=float(ss[i][school]['sline'])
dd.append(i)
dic.append(score)
except:
pass
li(school,zhiyuan,area,dd,dic)
if __name__ == '__main__':
getline()
compare()
最后成果
链接: https://pan.baidu.com/s/1QXbLiPCaSNByiyJNdVUzXg 密码: vows
最后祝各位考生旗开得胜!
这是老魏的公众号,会发布一些爬虫案例和心得,大家可以一起交流



被折叠的 条评论
为什么被折叠?



