数据可视化

最新推荐文章于 2024-10-14 23:40:45 发布

lss_cat

最新推荐文章于 2024-10-14 23:40:45 发布

阅读量4k

点赞数 6

分类专栏：无文章标签： python list

本文链接：https://blog.csdn.net/let_s_goon/article/details/121858884

版权

无专栏收录该内容

1 篇文章 0 订阅

订阅专栏

本文通过Python的xlrd库解析Excel数据，进行数据分析，并利用pyecharts库创建可视化图表，展示了对前程无忧招聘网站上不同城市、语言和薪资范围的程序员岗位分布情况。分析了C++、Java、Python三种语言的薪资分布，并绘制了柱状图。同时，还探讨了学历分布情况和程序员工作城市的数据分布。提供了代码示例供读者参考，以分析其他编程语言的数据。

摘要由CSDN通过智能技术生成

前言：相信来看可视化的兄弟多多少少都有一点python的基础，那么就开始进入我们今天的正题了。

主要内容：

1.用python里面的xlrd库获取我们爬下来的数据，不知道这个库也没事，我直接反手一个链接

2.相信都去看了一手这个库了，然后就是根据获取到的数据来分析了

3.再用一下python中的pyecharts这个库，说实话，这是真的好用

4.下面来康康我们的数据分析，哦不对，先说说我队友爬的是什么，我们爬的是前程无忧招聘网上的信息，先放一张照片给你们康康大概有哪些，等一下丢一个队友的链接在后面，想了解的可以去看看

代码展示：

import xlrd  # 读Excel数据用
file_location = "51job工作列表-北京,上海,广州,深圳,武汉-c++.xls"
data = xlrd.open_workbook(file_location)
sheet = data.sheet_by_index(0)
table = data.sheet_by_name('51job')
def numberstr2num(num_str):
    '''
  能完美处理整数小数字符串转数字的算法
  :param num_str:
  :return:
  '''
    import re
    assert isinstance(num_str, str)
    if num_str and re.match('(-|\\+)?\\d+(\\.\\d+)?', num_str):
        capital_char = num_str[0]
        if capital_char == '-' or capital_char == '+':
            num_str = num_str[1:]
        segs = num_str.split('.')
        # 整数部分字符串
        num_seg = segs[0]
        # 得到整数部分数值
        total_num = str2num(num_seg)
        # 存在小数部分字符串
        if len(segs) == 2:
            point_num_seg = segs[1]
            # 加上小数部分数值
            total_num += pointstr2num(point_num_seg)

        return total_num if not capital_char == '-' else 0 - total_num


def str2num(num_str):
    '''
  整数字符串转成数字
  :param num_str:
  :return:
  '''

    # 主方法已经验证过数字有效性，这里就不必再验证了
    index = 0
    str_len = len(num_str)
    num = 0
    for c in num_str:
        index += 1
        num += (ord(c) - 48) * pow(10, str_len - index)
    return num


def pointstr2num(point_str):
    '''
  小数部分字符串转成数字
  :param point_str:
  :return:
  '''
    # 主方法已经验证过数字有效性，这里就不必再验证了
    index = 0
    point_num = 0
    for c in point_str:
        index += 1
        point_num += (ord(c) - 48) / pow(10, index)
    return point_num
def change(s):
    s = s
    salary = []
    st = s[-1:-4:-1]
    time = s[-1]
    try:
        danwei = s[-3]
    except Exception as f:
        pass
    x=0
    j=0
    if time == '月' and danwei == '万':
     s = s.replace('万/月', '')
     s = s.split('-')
     x=0
     for i in s:
         b = numberstr2num(i) * 10000
         x+=b
         j+=1
         #salary.append(b)
    elif time=='月'and danwei=='千':
        s = s.replace('千/月', '')
        s = s.split('-')
        x=0
        for i in s:
            b=numberstr2num(i)*1000
            x+=b
            j+=1
            #salary.append(b)
    elif time=='年'and danwei=='万':
        s=s.replace('万/年','')
        s=s.split('-')
        x=0
        for i in s:
            b = numberstr2num(i) * 10000/365
            x+=b
            j+=1
            #salary.append(b)
    elif time=='年'and danwei=='千':
        s = s.replace('千/年', '')
        s = s.split('-')
        x=0
        for i in s:
            b = numberstr2num(i) * 1000/365
            x+=b
            j+=1
            #salary.append(b)
    elif time=='千'and danwei=='天':
        s = s.replace('千/天', '')
        s = s.split('-')
        x=0
        for i in s:
            b = numberstr2num(i) * 1000*30
            x+=b
            j+=1
            #salary.append(b)
    elif time=='万'and danwei=='天':
        s = s.replace('万/天', '')
        s = s.split('-')
        x=0
        for i in s:
            b = numberstr2num(i) * 10000 * 30
            x+=b
            j+=1
    elif time=='天'and danwei=='元':
        s = s.replace('元/天', '')
        s = s.split('-')
        x=0
        for i in s:
            b = numberstr2num(i)* 30
            x+=b
            j+=1
    if j>0:
        salary.append(x/j)
    else:
        salary.append(0)

    return salary
# s = '0.8-1.6万/月'
# s = change(s)
# print(s)
sum1 = 0
sum2 = 0
sum3 = 0
sum4 = 0
sum5 = 0
sum6 = 0
sum7 = 0
days = [sheet.cell_value(r, 2) for r in range(1, sheet.nrows)]
print(days)
for i in days:
    i = change(i)
    i=int(i[0])
    print(i)
    if i==0:
        continue
    if i <= 5000:
        sum1 += 1
    if i > 5000 and i <= 10000:
        sum2 += 1
    if i > 10000 and i <= 20000:
        sum3 += 1
    if i > 20000 and i <= 30000:
        sum4 += 1
    if i > 30000 and i <= 40000:
        sum5 += 1
    if i > 40000 and i <= 50000:
        sum6 += 1
    if i > 50000:
        sum7 += 1
from pyecharts import options as opts
from pyecharts.charts import Bar
l1 = [sum1, sum2, sum3, sum4, sum5, sum6, sum7]
file_location = "51job工作列表-成都-java.xls"
data = xlrd.open_workbook(file_location)
#	data是Excel里的数据
sheet = data.sheet_by_index(0)
table = data.sheet_by_name('51job')
c1=0
c2=0
c3=0
c4=0
c5=0
c6=0
c7=0
days1 = [sheet.cell_value(r, 2) for r in range(1, sheet.nrows)]
for i in days1:
    i = change(i)
    i=int(i[0])
    if i==0:
        continue
    if i <= 5000:
        c1+= 1
    if i > 5000 and i <= 10000:
        c2 += 1
    if i > 10000 and i <= 20000:
        c3 += 1
    if i > 20000 and i <= 30000:
        c4 += 1
    if i > 30000 and i <= 40000:
        c5 += 1
    if i > 40000 and i <= 50000:
        c6 += 1
    if i > 50000:
        c7 += 1
ll1=[c1,c2,c3,c4,c5,c6,c7]
file_location = "51job工作列表-成都python.xls"
data = xlrd.open_workbook(file_location)
#	data是Excel里的数据
sheet = data.sheet_by_index(0)
table = data.sheet_by_name('51job')
cc1=0
cc2=0
cc3=0
cc4=0
cc5=0
cc6=0
cc7=0
days2 = [sheet.cell_value(r, 2) for r in range(1, sheet.nrows)]
for i in days2:
    i = change(i)
    i=int(i[0])
    if i==0:
        continue
    if i <= 5000:
        cc1 += 1
    if i > 5000 and i <= 10000:
        cc2 += 1
    if i > 10000 and i <= 20000:
        cc3 += 1
    if i > 20000 and i <= 30000:
        cc4 += 1
    if i > 30000 and i <= 40000:
        cc5 += 1
    if i > 40000 and i <= 50000:
        cc6 += 1
    if i > 50000:
        cc7 += 1
ll11=[cc1,cc2,cc3,cc4,cc5,cc6,cc7]
l2 = ['0-5000', '5000-10000', '10000-20000', '20000-30000', '30000-40000', '40000-50000', '50000以上']
from pyecharts.charts import Bar
from pyecharts import options as opts
bar = Bar()
#指定柱状图的横坐标
bar.add_xaxis(['0-5000', '5000-10000', '10000-20000', '20000-30000', '30000-40000', '40000-50000', '50000以上'])
#指定柱状图的纵坐标，而且可以指定多个纵坐标
bar.add_yaxis("c++", l1)
bar.add_yaxis("java", ll11)
bar.add_yaxis("python", ll1)
#指定柱状图的标题
bar.set_global_opts(title_opts=opts.TitleOpts(title="热门语言"))
#参数指定生成的html名称
bar.render('E:热门语言工资大概状况.html')

看内里：

不是说只有这几种语言比较热门，看我们学校的大多数学的就是这些，所以就分析了这三个，

有学编程语言的可以那代码去分析你想要了解的语言的情况，这里就不多说了

用饼图分析数据：

dayss = [sheet.cell_value(r, 5) for r in range(1, sheet.nrows)]
for i in dayss:
    c6+=1
    if i=="本科":
        c1+=1
    elif i=="大专":
        c2+=1
    elif i=="硕士":
        c3+=1
    elif i=="博士":
        c4+=1
    else:
        c5+=1
# print(c4)
num = [round(c1/c6*100,3),round(c2/c6*100,3),round(c3/c6*100,3),round(c4/c6*100,3),round(c5/c6*100,3)]
# for i in num:
#     print(i)
c = Pie(init_opts=opts.InitOpts(theme=ThemeType.LIGHT))
c.add("",[list(z) for z in zip(L1,num)])
w=s1[1]
c.set_global_opts(title_opts = opts.TitleOpts(title="%s-学位概况"%w),
                 toolbox_opts = opts.ToolboxOpts(is_show=True))
c.set_series_opts(label_opts = opts.LabelOpts(formatter='{b}:{c}%'))
c.render("学位占比.html")

再来看看几个城市的程序岗位分布：

c1=0
c2=0
c3=0
c4=0
c5=0
l=["北京","上海","深圳","武汉","广州"]
days = [sheet.cell_value(r,7) for r in range(1,sheet.nrows)]
for i in days:
    x=i[0:2]
    if x=="北京":
        c1+=1
    elif x=="上海":
        c2+=1
    elif x=="深圳":
        c3+=1
    elif x=="武汉":
        c4+=1
    elif x=="广州":
        c5+=1
v=[c1,c2,c3,c4,c5]
c = (
    Geo()
    .add_schema(maptype="china")
    .add("geo", [list(z) for z in zip(l, [c1,c2,c3,c4,c5])])
    .set_series_opts(label_opts=opts.LabelOpts(is_show=False))
    .set_global_opts(
        visualmap_opts=opts.VisualMapOpts(),
        title_opts=opts.TitleOpts(title="全国各省程序员工作地方数据分布"),
    )
)
c.render("E:分布地图.html")

点地图上的小红点可以看数据哦

再来个上面地区的词云耍耍：

words=list(zip(list(name),list(value)))
word=WordCloud()
word.add("",words,word_size_range=[20, 200],)
work=s.split("-")[2].replace('.xls','')
place=s.split("-")[1].replace('.xls','')
if place=="1":
    place="全国"
else:
    pass
word.set_global_opts(title_opts=opts.TitleOpts(title="%s在%s岗位分布词云图:"%(work,place)))
word.render("%s在%s工作岗位分布.html"%(work,place))