目录
效果图
开发配置
浏览器:谷歌、Edge
编程工具:PyCharm、Navicat(MySql)
编程语言:Python、SQL
可视化模型生成技术:pyecharts
数据来源:国家统计局 招生、在校学生、毕业生数
数据获取:爬虫
实现流程
数据库设计
列名 | 数据类型 | 长度 | 非空 | 主键 | 说明 |
id | int | 0 | √ | √ | ID标识 |
year | int | 0 | 年份 | ||
name | varchar | 255 | 指标 | ||
recruit_students | varchar | 255 | 招生人数 | ||
In_school_students | varchar | 255 | 在校人数 | ||
graduate | varchar | 255 | 毕业人数 |
数据库连接
创建一个py文件(MySql.py)
import pymysql
# 打开数据库连接
db = pymysql.connect(
database = 'data_visualization' ,
host = 'localhost' ,
user = 'root' ,
password = '123456' ,
)
# 创建游标
cursor = db.cursor()
# 判断数据库是否连接
def SqlDB():
try:
if db.open() == 1:
return 'db.open() == 1:数据库已连接'
else:
return 'db.open() == 0:数据库未连接'
except:
print(pymysql.Error.args[0] , pymysql.Error.args[1])
return '连接错误'
# 数据库 -》 插入数据
def SqlInsert( sql , listData ):
try:
# 执行SQL语句
cursor.execute(sql , listData)
if cursor.rowcount > 0:
print('数据插入成功')
else:
print('数据插入失败')
# 数据持久化保存
db.commit()
except:
print(SqlDB())
# 发生错误时回滚
db.rollback()
# 数据库 -》 查询数据
def SqlSelect( sql ):
try:
# 执行SQL语句
cursor.execute(sql)
data = cursor.fetchall()
if cursor.fetchall() is None:
return '搜索为空'
# 返回查询结果所有数据
return data
except:
print(SqlDB())
# 关闭数据库连接
def dbClose():
db.close()
print('数据库成功关闭')
if __name__ == '__main__':
print()
数据获取
# coding=utf-8
from lxml import html
import requests
import time
import MySql
def student_number():
url = 'https://data.stats.gov.cn/tablequery.htm?code=AD1O'
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36 Edg/122.0.0.0' } # 浏览器代理
key = { } # 参数键值对
key['m'] = 'QueryData'
key['code'] = 'AD1O'
for year in range(2023 , 2013 , -1):
key['wds'] = '[{"wdcode":"reg","valuecode":"000000"},{"wdcode":"sj","valuecode":"' + str(year) + '"}]'
# 获取网页源代码
r = requests.get(url , headers = headers , params = key , verify = False)
etree = html.etree
Html = etree.HTML(r.text) # 将HTML文本加载到etree对象中
xpath0 = Html.xpath('//td') # 标签定位
# 确定数据结构
listData1 = []
listData2 = []
listData2.append(year)
for i in range(1 , len(xpath0) + 1):
listData2.append(xpath0[i - 1].text)
if i % 4 == 0:
listData1.append(listData2)
listData2 = []
listData2.append(year)
print(listData1)
# 控制爬取速度
time.sleep(2)
# 数据插入数据库
sql = 'INSERT INTO `data_visualization`.`students_number` (`year`, `name`, `recruit_students`, `In_school_students`, `graduate`) ' \
'VALUES (%s, %s, %s, %s, %s);'
for i in range(len(listData1)):
MySql.SqlInsert(sql , listData1[i])
if __name__ == '__main__':
# 招生、在校学生、毕业生数 数据
student_number()
效果图
获取后在Navicat中对数据进一步整理格式
SELECT NAME FROM students_number WHERE `name` LIKE '% #%'
UPDATE students_number SET `name`='专科' WHERE `name` LIKE '% #专科%'
UPDATE students_number SET `name`='职业初中' WHERE `name` LIKE '% #职业初中%'
pycharts
官方链接:Timeline - Timeline_bar - Document (pyecharts.org)
将官方的源代码复制一下然后将里面的数据替换,再增加自己想要的图表配置。
from pyecharts import options as opts
from pyecharts.charts import Bar , Timeline
import MySql
# 将元组二维数据换为一维
def Data_structure_transformation( data ):
data = list(data)
for i in range(0 , len(data)):
data[i] = data[i][0]
return data
def dataHtml():
sql = 'SELECT DISTINCT `name` FROM students_number'
nameList = Data_structure_transformation(MySql.SqlSelect(sql))
# 图表类型 -》 时间轴柱图
tl = Timeline(init_opts = opts.InitOpts(width = '1200px' , height = '600px' , page_title = '数据可视化'))
for i in range(2014 , 2024):
sql1 = 'SELECT `recruit_students` FROM students_number WHERE YEAR = ' + str(i)
sql2 = 'SELECT `In_school_students` FROM students_number WHERE YEAR = ' + str(i)
sql3 = 'SELECT `graduate` FROM students_number WHERE YEAR = ' + str(i)
SelectNumber1 = Data_structure_transformation(MySql.SqlSelect(sql1))
SelectNumber2 = Data_structure_transformation(MySql.SqlSelect(sql2))
SelectNumber3 = Data_structure_transformation(MySql.SqlSelect(sql3))
bar = (
Bar(init_opts = opts.InitOpts(width = '1200px' , height = '600px' , page_title = '数据可视化'))
.add_xaxis(nameList)
.add_yaxis('招生数(人)' , SelectNumber1)
.add_yaxis('在校学生数(人)' , SelectNumber2 ,
markpoint_opts = opts.MarkPointOpts(
data = [opts.MarkPointItem(type_ = "max" , name = "最大值")] , symbol = 'pin',label_opts=opts.LabelOpts(color='red')))
.add_yaxis('毕业生数(人)' , SelectNumber3)
.set_global_opts(
title_opts = opts.TitleOpts(title = "招生、在校学生、毕业生数" , subtitle = "数据来源:国家统计局(https://data.stats.gov.cn/tablequery.htm?code=AD10)"))
.set_series_opts(label_opts = opts.LabelOpts(is_show = False)
)
)
tl.add(bar , "{}年".format(i))
tl.render("students_number.html")
if __name__ == '__main__':
dataHtml()
效果图
运行后再文件夹下会生成一个 students_number.html 文件,双击打开,在右侧有可选择用哪个浏览器打开