工具:Pycharm,Navicat
将前段时间使用爬虫获取的数据进行可视化分析。
于2018/3/18增加职位词云
一.统计数据
import pymysql
db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的密码', db='你的数据库名', charset='utf8')
cursor = db.cursor()
# 教育程度:查询 lagou 这个表中的 education字段的所有字段值,返回的是一个元组
edusql = "SELECT education FROM LAGOU"
# 执行sql语句
cursor.execute(edusql)
# 使用集合来去掉重复的字段值,并放在一个列表中,绘图需要
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
# 创建列表,用来存放字段值对应的出现次数
educount = []
for each in edulist:
sql1 = "SELECT * FROM lagou where education = '%s'" %each
cursor.execute(sql1)
count1 = cursor.rowcount
educount.append(count1)
# print('%s的次数:%d'%(each,count1))
# print(edulist, educount)
# 工作经验:查询 lagou 这个表中的workYear字段的所有字段值,返回的是一个元组
worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
cursor.execute(sql2)
count2 = cursor.rowcount
workcount.append(count2)
# print(worklist, workcount)
# 地区
dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
sql3 = "SELECT * FROM lagou where district = '%s'" % each
cursor.execute(sql3)
count3 = cursor.rowcount
discount.append(count3)
# print(dislist, discount)
# 职位信息
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
cursor.execute(sql4)
count4 = cursor.rowcount
positioncount.append(count4)
# print(positionlist, positioncount)
db.close()
于2018\3\20修改
将程序中获取数据列表与数据数量列表封装成一个函数,并使用Counter来获取。
from collections import Counter
def getList(filed):
sql = "SELECT %s FROM LAGOU" %filed
# print(sql)
cursor.execute(sql)
gettuple = cursor.fetchall()
L = Counter([each[0] for each in gettuple if each[0]])
dl = dict(L)
resultlist = list(dl.keys())
resultcount = [dl[i] for i in resultlist]
return resultlist, resultcount
将
# 教育程度:查询 lagou 这个表中的 education字段的所有字段值,返回的是一个元组
edusql = "SELECT education FROM LAGOU"
# 执行sql语句
cursor.execute(edusql)
# 使用集合来去掉重复的字段值,并放在一个列表中,绘图需要
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
# 创建列表,用来存放字段值对应的出现次数
educount = []
for each in edulist:
sql1 = "SELECT * FROM lagou where education = '%s'" %each
cursor.execute(sql1)
count1 = cursor.rowcount
educount.append(count1)
# print('%s的次数:%d'%(each,count1))
# print(edulist, educount)
换成
# 教育程度:查询 lagou 这个表中的 education字段的所有字段值,返回的是一个元组
# 执行sql语句
# 使用集合来去掉重复的字段值,并放在一个列表中,绘图需要 ,创建列表,用来存放字段值对应的出现次数
eduresult = getList('education')
edulist, educount = eduresult # 序列解包
# print(edulist)
# print(educount)
另外三种数据的获取方式也一样进行修改。在下面的制作图表过程中进行同样的修改即可。
打印结果
二.制作图表
在制作图表之前给你们看一个例子:
制作柱状图:
from pyecharts import Bar
bar = Bar("我的第一个图表", "这里是副标题")
bar.add("服装", ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"], [5, 20, 36, 10, 75, 90])
bar.show_config()
bar.render()
在Python中运行上面的程序后,会在当前文件下生成一个名为 'render.html' 的文件,使用浏览器打开这个文件就可以看到
你做好的柱状图了。如图所示:
制作成饼图
from pyecharts import Pie
attr = ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"]
v1 = [11, 12, 13, 10, 10, 10]
pie = Pie("饼图示例")
pie.add("", attr, v1, is_label_show=True)
pie.show_config()
pie.render(r"lizi.html")
在Python中运行上面的程序后,会在当前文件下生成一个名为 'lizi.html' 的文件,使用浏览器打开这个文件就可以看到
你做好的饼图。如图所示:
熟悉这个例子之后我们就可以进行我们的数据可视化操作了。
现在就将工作经验和教育程度做成柱状图,地区做成饼状图。
在统计数据中的代码的基础上加入绘制图表的代码变成
import pymysql
from pyecharts import Bar
from pyecharts import Pie
from pyecharts import WordCloud
db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='1likePython', db='TESTDB', charset='utf8')
cursor = db.cursor()
edusql = "SELECT education FROM LAGOU"
cursor.execute(edusql)
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
educount = []
for each in edulist:
sql1 = "SELECT * FROM lagou where education = '%s'" %each
cursor.execute(sql1)
count1 = cursor.rowcount
educount.append(count1)
# print(edulist, educount)
worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
cursor.execute(sql2)
count2 = cursor.rowcount
workcount.append(count2)
# print(worklist, workcount)
dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
sql3 = "SELECT * FROM lagou where district = '%s'" % each
cursor.execute(sql3)
count3 = cursor.rowcount
discount.append(count3)
# print(dislist, discount)
# 职位
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
cursor.execute(sql4)
count4 = cursor.rowcount
positioncount.append(count4)
# print(positionlist, positioncount)
name = '拉钩图表'
bar1 = Bar(name, '第一张')
bar1.add('学历要求', edulist, educount)
bar1.show_config()
bar1.render(r'edu.html')
bar2 = Bar(name, '第二张')
bar2.add('工作经验', worklist, workcount)
bar2.show_config()
bar2.render(r'work.html')
pie = Pie(name, '饼图示例')
pie.add("", dislist, discount, is_label_show=True)
pie.show_config()
pie.render(r"district.html")
wordcloud = WordCloud(width=1300, height=620)
wordcloud.add("", positionlist, positioncount, word_size_range=[20, 100])
wordcloud.show_config()
wordcloud.render(r"positionName.html")
db.close()
执行程序后打开对应的 html 文件就可以看到图表了。