上海中考分数线爬虫及使用plotly数据可视化

上海中考分数线爬虫及使用plotly数据可视化

马上就中考了,蹭一波热度,做了一个上海市近几年中考分数线对比的爬虫,各区学校对比用了柱状图,各校历年分数线变化用了线形图
效果如下:
在这里插入图片描述
在这里插入图片描述

数据来源:微信小程序 升学查分

数据获取代码

#-----------------引入区-----------------
import requests
import pandas as pd
from urllib.parse import quote
​
#-----------------常数区-----------------
dict = {}
df=pd.DataFrame()
res=requests.session()
token=''#自行抓包获取
h={"API-CITY": quote('上海市'),"API-TOKEN": token,"Accept-Encoding": "gzip,compress,br,deflate","Connection": "keep-alive","Host": "xiaokedou.xkd100.com","Referer": "https://servicewechat.com/wxd588a54f779b2090/43/page-frame.html","User-Agent": "Mozilla/5.0 (iPhone; CPU iPhone OS 14_5_1 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Mobile/15E148 MicroMessenger/8.0.6(0x18000632) NetType/WIFI Language/zh_CN","content-type": "application/json"}#-----------------函数区-----------------
​
​
def getcode(year,vol,area):#获取数据
​
    url='https://xiaokedou.xkd100.com/api/mid/search?year='+year+'&volunteer='+vol+'&area='+area
    s=res.get(url,headers=h).json()['data']['list']
    dd={}
    for i in s:
        sname=i['school_name']
        scode=i['recruit_code']
        sline=i['score_line']
        stype=i['volunteer_type']
        dd.update({sname:{'stype':stype,'scode':scode,'sline':sline}})
    return dd
​
​
def getlist():#获取列表构造数据框架
    url='https://xiaokedou.xkd100.com/api/mid/where'
    s=res.get(url,headers=h).json()['data']['where']
    years=s['years']
    volunteers=s['volunteers']
    areas=s['areas']
    for area in areas:
        dic = {}
        for vol in volunteers:
            ys={}
            for year in years:
                x=getcode(str(year),vol,area)
                ys.update({year:x})
            dic.update({vol:ys})
        dict.update({area:dic})
        print(area)
if __name__ == '__main__':
​
    getlist()
    pt=df.from_dict(dict)#将dict转为dateframe
    pt.to_json('data.json')#转存位json文件

数据可视化代码

#-----------------引入区-----------------
import pandas as pd
import plotly
import plotly.graph_objects as go
import plotly.io as po
import os
​
#-----------------常数区-----------------
df=pd.DataFrame()
x=pd.read_json('data.json')
pl=plotly.plot
​
#-----------------函数区-----------------
def zhu(year,zhiyuan,area,dd,dic):#通过数据画线性图,并存入jpg图片文件
    line = go.Bar(x=dd, y=dic)
    layout = go.Layout(title=year + area + zhiyuan)
    fg = go.Figure(line, layout)
    path = 'tmp/' + area + '/' + zhiyuan + '/'
    if os.path.exists(path) == False:
        os.makedirs(path)
    po.write_image(fg, path + year + '.jpg', width=1920, height=1080)
​
​
def li(school,zhiyuan,area,dd,dic):#通过数据画柱状图,并存入jpg图片文件
    line = go.Scatter(x=dd, y=dic)
    layout=go.Layout(title = school+area+zhiyuan)
    fg = go.Figure(line,layout)
    path='score/'+area+'/'+zhiyuan+'/'
    if os.path.exists(path)==False:
        os.makedirs(path)
    po.write_image(fg,path+school+'.jpg',width=1920,height=1080)def compare():#横向对比每年各区各校分数线情况
    for area in x:
        s = df.from_dict(x[area]).T
        for zhiyuan in s:
            ss=s[zhiyuan][0]
            for year in ss:
                dic = []
                dd = []
                for j in ss[year]:
                    dd.append(j)
                    dic.append(float(ss[year][j]['sline']))
                zhu(year,zhiyuan,area,dd,dic)def getline():#纵向对比每个学校各志愿分数线情况
    for area in x:
        s = df.from_dict(x[area]).T
        for zhiyuan in s:
                ss = s[zhiyuan][0]
                schools=ss['2020'].keys()
                for school in schools:
                    dd = []
                    dic = []
                    for i in ss:
                        try:
                            score=float(ss[i][school]['sline'])
                            dd.append(i)
                            dic.append(score)
                        except:
                            pass
                    li(school,zhiyuan,area,dd,dic)
​
​
if __name__ == '__main__':
    getline()
    compare()

最后成果

链接: https://pan.baidu.com/s/1QXbLiPCaSNByiyJNdVUzXg 密码: vows
最后祝各位考生旗开得胜!

这是老魏的公众号,会发布一些爬虫案例和心得,大家可以一起交流
在这里插入图片描述

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值