Python3之拉钩数据可视化

18 篇文章 0 订阅
8 篇文章 0 订阅

工具:Pycharm,Navicat

将前段时间使用爬虫获取的数据进行可视化分析。


于2018/3/18增加职位词云

一.统计数据

import pymysql

db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='你的密码', db='你的数据库名', charset='utf8')

cursor = db.cursor()
# 教育程度:查询 lagou 这个表中的 education字段的所有字段值,返回的是一个元组
edusql = "SELECT education FROM LAGOU"

# 执行sql语句
cursor.execute(edusql)

# 使用集合来去掉重复的字段值,并放在一个列表中,绘图需要
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]

# 创建列表,用来存放字段值对应的出现次数
educount = []
for each in edulist:
    sql1 = "SELECT * FROM lagou where education = '%s'" %each
    cursor.execute(sql1)
    count1 = cursor.rowcount
    educount.append(count1)
    # print('%s的次数:%d'%(each,count1))
# print(edulist, educount)

# 工作经验:查询 lagou 这个表中的workYear字段的所有字段值,返回的是一个元组
worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
    sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
    cursor.execute(sql2)
    count2 = cursor.rowcount
    workcount.append(count2)
# print(worklist, workcount)

# 地区
dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
    sql3 = "SELECT * FROM lagou where district = '%s'" % each
    cursor.execute(sql3)
    count3 = cursor.rowcount
    discount.append(count3)
# print(dislist, discount)

# 职位信息
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
        sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
        cursor.execute(sql4)
        count4 = cursor.rowcount
        positioncount.append(count4)
# print(positionlist, positioncount)

db.close()


于2018\3\20修改

将程序中获取数据列表与数据数量列表封装成一个函数,并使用Counter来获取。

from collections import Counter

def getList(filed):
    sql = "SELECT %s FROM LAGOU" %filed
    # print(sql)
    cursor.execute(sql)
    gettuple = cursor.fetchall()
    L = Counter([each[0] for each in gettuple if each[0]])
    dl = dict(L)
    resultlist = list(dl.keys())
    resultcount = [dl[i] for i in resultlist]

    return resultlist, resultcount

# 教育程度:查询 lagou 这个表中的 education字段的所有字段值,返回的是一个元组
edusql = "SELECT education FROM LAGOU"

# 执行sql语句
cursor.execute(edusql)

# 使用集合来去掉重复的字段值,并放在一个列表中,绘图需要
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]

# 创建列表,用来存放字段值对应的出现次数
educount = []
for each in edulist:
    sql1 = "SELECT * FROM lagou where education = '%s'" %each
    cursor.execute(sql1)
    count1 = cursor.rowcount
    educount.append(count1)
    # print('%s的次数:%d'%(each,count1))
# print(edulist, educount)

换成

# 教育程度:查询 lagou 这个表中的 education字段的所有字段值,返回的是一个元组
# 执行sql语句
# 使用集合来去掉重复的字段值,并放在一个列表中,绘图需要  ,创建列表,用来存放字段值对应的出现次数
eduresult = getList('education')
edulist, educount = eduresult       # 序列解包
# print(edulist)
# print(educount)

另外三种数据的获取方式也一样进行修改。在下面的制作图表过程中进行同样的修改即可。

打印结果



二.制作图表

在制作图表之前给你们看一个例子:

制作柱状图:

from pyecharts import Bar

bar = Bar("我的第一个图表", "这里是副标题")
bar.add("服装", ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"], [5, 20, 36, 10, 75, 90])
bar.show_config()
bar.render()

在Python中运行上面的程序后,会在当前文件下生成一个名为 'render.html' 的文件,使用浏览器打开这个文件就可以看到

你做好的柱状图了。如图所示:

制作成饼图

from pyecharts import Pie

attr = ["衬衫", "羊毛衫", "雪纺衫", "裤子", "高跟鞋", "袜子"]
v1 = [11, 12, 13, 10, 10, 10]
pie = Pie("饼图示例")
pie.add("", attr, v1, is_label_show=True)
pie.show_config()
pie.render(r"lizi.html")

在Python中运行上面的程序后,会在当前文件下生成一个名为 'lizi.html' 的文件,使用浏览器打开这个文件就可以看到

你做好的饼图。如图所示:



熟悉这个例子之后我们就可以进行我们的数据可视化操作了。

现在就将工作经验和教育程度做成柱状图,地区做成饼状图。

在统计数据中的代码的基础上加入绘制图表的代码变成

import pymysql
from pyecharts import Bar
from pyecharts import Pie
from pyecharts import WordCloud

db = pymysql.connect(host='127.0.0.1', port=3306, user='root', passwd='1likePython', db='TESTDB', charset='utf8')

cursor = db.cursor()
edusql = "SELECT education FROM LAGOU"
cursor.execute(edusql)
edulist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
educount = []
for each in edulist:
    sql1 = "SELECT * FROM lagou where education = '%s'" %each
    cursor.execute(sql1)
    count1 = cursor.rowcount
    educount.append(count1)
# print(edulist, educount)

worksql = "SELECT workYear FROM LAGOU"
cursor.execute(worksql)
worklist = [j[0] for j in list(set(cursor.fetchall())) if j[0]]
workcount = []
for each in worklist:
    sql2 = "SELECT * FROM lagou where workYear = '%s'" %each
    cursor.execute(sql2)
    count2 = cursor.rowcount
    workcount.append(count2)
# print(worklist, workcount)

dissql = "SELECT district from lagou"
cursor.execute(dissql)
dislist = [i[0] for i in list(set(cursor.fetchall())) if i[0]]
discount = []
for each in dislist:
    sql3 = "SELECT * FROM lagou where district = '%s'" % each
    cursor.execute(sql3)
    count3 = cursor.rowcount
    discount.append(count3)
# print(dislist, discount)

# 职位
positionsql = "SELECT positionName from lagou"
cursor.execute(positionsql)
positionlist = [i[0].replace(' ', '') for i in list(set(cursor.fetchall())) if i[0]]
positioncount = []
for each in positionlist:
        sql4 = "SELECT * FROM lagou where positionName = '%s'" % each
        cursor.execute(sql4)
        count4 = cursor.rowcount
        positioncount.append(count4)
# print(positionlist, positioncount)

name = '拉钩图表'
bar1 = Bar(name, '第一张')
bar1.add('学历要求', edulist, educount)
bar1.show_config()
bar1.render(r'edu.html')

bar2 = Bar(name, '第二张')
bar2.add('工作经验', worklist, workcount)
bar2.show_config()
bar2.render(r'work.html')

pie = Pie(name, '饼图示例')
pie.add("", dislist, discount, is_label_show=True)
pie.show_config()
pie.render(r"district.html")


wordcloud = WordCloud(width=1300, height=620)
wordcloud.add("", positionlist, positioncount, word_size_range=[20, 100])
wordcloud.show_config()
wordcloud.render(r"positionName.html")

db.close()

执行程序后打开对应的 html 文件就可以看到图表了。


三.展示结果









评论 5
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值