Python量化分析：人力资源数据分析

joker_data_pink

已于 2022-12-21 20:43:37 修改

阅读量1.8k

点赞数 5

文章标签： python 数据分析开发语言编辑器数据挖掘

于 2022-12-21 20:41:03 首次发布

本文链接：https://blog.csdn.net/joker_man1/article/details/128401500

版权

本文通过Python对职位需求数据进行了深入分析，包括各城市招聘人数统计、职位工作年限平均值、大类职位招聘总数、发布年份和月份的需求趋势等。通过数据清洗和可视化，揭示了不同维度的人力资源市场状况，有助于理解行业动态。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

量化分析

人力资源数据分析

一、题目：

国内不同地点的招聘人数总数

不同类别岗位的平均工作年限分析

不同类别（大类、小类）的职位招聘人数总数，如“技术>测试”表示大类是技术，小类是测试

不同年份/月份发布的职位需求人数分析

不同地点不同大类职位类别的职位招聘人数总数

描述上述1-6数据分析过程中你遇到了哪些问题以及解决方法

二、要求

用Python进行数据分析，分析职位需求数据，并且进行数据清洗及各种维度的数据分析及可视化

三、可视化示例

在这里插入图片描述这里就给大家简单举例两个啦，基本上上面的题目在代码中都可以一一表示出来。

四、代码展示

import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#题目1
df=pd.read_excel('job_data.xlsx',header=0)#读取数据
print(df)
city_list=list(df['工作地点'].value_counts().keys())#获取工作地点的名称list
print(city_list)
city_job=[]
for city in city_list:
    df_city=df[df['工作地点'].isin([city])].reset_index()#按照工作地点获取对应的dataframe
    city_job.append(sum(df_city['招聘人数']))#获取对应的招聘人数总数
print(city_job)
plt.rcParams['font.sans-serif']=['SimHei'];  #开始画图
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同城市招聘人数")
plt.xlabel("城市")
plt.ylabel("人数 /人")
x=np.arange(len(city_list))
plt.xticks(x,city_list,fontsize=10)
plt.bar(x,city_job,color='g')#画柱状图
for index in range(len(city_list)):
    plt.text(x[index],city_job[index],city_job[index])
plt.show()
#题目2
job_class=df['职位类别'].value_counts().keys()#获取职位的名称list
for index in range(df.shape[0]):
    if df.loc[index,'工作年限']=='不限': 
        df.loc[index,'工作年限']=0
    elif df.loc[index,'工作年限']=='10年以上':
        df.loc[index,'工作年限']=10
    else:
        df.loc[index,'工作年限']=int(df.loc[index,'工作年限'].split('-')[0])
print(city_list)
job_class_worktime=[]
for job in job_class:
    df_job=df[df['职位类别'].isin([job])].reset_index()#按照职位类别获取对应的dataframe
    job_class_worktime.append(round(sum(df_job['工作年限'])/df_job.shape[0],1))#获取不同职位的平均招聘人数
print(job_class_worktime)
plt.rcParams['font.sans-serif']=['SimHei'];  #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同岗位平均工作年限")
plt.xlabel("工作岗位")
plt.ylabel("工作年限 /年")
x=np.arange(len(job_class_worktime))
plt.xticks(x,job_class,fontsize=10,rotation=90)
plt.bar(x,job_class_worktime,color='b')
for index in range(len(job_class)):
    plt.text(x[index],job_class_worktime[index],job_class_worktime[index])
plt.show()
for index in range(len(job_class)):
    print(f'岗位 {job_class[index]} 平均工作年限为 {job_class_worktime[index]}年')#打印输出不同职位对应工作年限的平均值


#题目3
job_class_recrutment=[]
for job in job_class:
    df_job=df[df['职位类别'].isin([job])].reset_index()#按照职位类别获取对应的dataframe
    job_class_recrutment.append(sum(df_job['招聘人数']))#获取不同职位的平均招聘人数

job_class_big=[]
for job in job_class:
    job_class_big.append(job.split('>')[0])

job_class_big=set(job_class_big)
print(job_class_big)

job_class_big_recrutment={}#建立字典，用来存储大类岗位的招聘人数
for item in job_class_big:
    job_class_big_recrutment[item]=0
for index in range(len(job_class)):
    for job_name in job_class_big:
        if job_name in job_class[index]:
            job_class_big_recrutment[job_name]+=job_class_recrutment[index]
            break
            
print(job_class_big_recrutment)
plt.rcParams['font.sans-serif']=['SimHei'];  #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("大类工作岗位招聘人数")
plt.xlabel("工作岗位")
plt.ylabel("招聘人数 /人")
x=np.arange(len(job_class_big_recrutment.keys()))
plt.xticks(x,job_class_big_recrutment.keys(),fontsize=10,rotation=90)
plt.bar(x,job_class_big_recrutment.values(),color='y')
for index in range(len(job_class_big_recrutment.keys())):
    plt.text(x[index],list(job_class_big_recrutment.values())[index],list(job_class_big_recrutment.values())[index])
plt.show()


#题目4
df1=df['发布时间'].str.split('-',expand=True)
df1.columns =['年', '月', '日']
print(df1)
df['年']=df1['年']
df['月']=df1['月']
year_recrutment=df['年'].value_counts()
month_recrutment=df['月'].value_counts()
year_recrutment_people=[]
for year in year_recrutment.keys():
    df_year=df[df['年'].isin([year])].reset_index()#按照年获取对应的dataframe
    year_recrutment_people.append(sum(df_year['招聘人数']))#获取不同年的招聘人数

month_recrutment_people=[]
for month in month_recrutment.keys():
    df_month=df[df['月'].isin([month])].reset_index()#按照月获取对应的dataframe
    month_recrutment_people.append(sum(df_month['招聘人数']))#获取不同月的招聘人数

print(year_recrutment_people)
print(month_recrutment_people)

plt.rcParams['font.sans-serif']=['SimHei'];  #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("年 招聘人数")
plt.xlabel("年份")
plt.ylabel("招聘人数 /人")
x=np.arange(len(year_recrutment.keys()))
plt.xticks(x,year_recrutment.keys(),fontsize=10,rotation=90)
plt.bar(x,year_recrutment_people,color='y')
for index in range(len(year_recrutment_people)):
    plt.text(x[index],year_recrutment_people[index],year_recrutment_people[index])
plt.show()

plt.rcParams['font.sans-serif']=['SimHei'];  #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("月 招聘人数")
plt.xlabel("月份")
plt.ylabel("招聘人数 /人")
x=np.arange(len(month_recrutment.keys()))
plt.xticks(x,month_recrutment.keys(),fontsize=10,rotation=90)
plt.bar(x,month_recrutment_people,color='lightgreen')
for index in range(len(month_recrutment_people)):
    plt.text(x[index],month_recrutment_people[index],month_recrutment_people[index])
plt.show()


#题目5
city_job_class_big={}#用来保存不同城市不同工作大类的招聘总人数

for city in city_list:
    df_city=df[df['工作地点'].isin([city])].reset_index()#按照工作地点获取对应的dataframe
    job_class_big_recrutment={}#建立字典，用来存储大类岗位的招聘人数
    for item in job_class_big:#初始化字典
        job_class_big_recrutment[item]=0
    for index in range(df_city.shape[0]):
        for job_name in job_class_big:
            if job_name in df_city.loc[index,'职位类别']:
                job_class_big_recrutment[job_name]+=df_city.loc[index,'招聘人数']
                break
    city_job_class_big[city]=job_class_big_recrutment
print(city_job_class_big)

color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00',
            '#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080']
plt.rcParams['font.sans-serif']=['SimHei'];  #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同地点大类招聘人数")
plt.xlabel("工作岗位大类")
plt.ylabel("招聘总人数 /人")
x=np.arange(len(job_class_big))
plt.xticks(x,job_class_big,fontsize=10,rotation=90)
for index  in range(len(city_list)):
    plt.plot(city_job_class_big[city_list[index]].values(),color=color_list[index],label=city_list[index])
    for index1 in range(len(job_class_big)):
        plt.text(x[index1],list(city_job_class_big[city_list[index]].values())[index1],list(city_job_class_big[city_list[index]].values())[index1])

plt.legend()
plt.show()

#题目6
education_spread=df['学历要求'].value_counts()
education_job_class_big={}#用来保存不同城市不同工作大类的招聘总人数

for job in job_class_big:
    education_dic={}
    for education in education_spread.keys():#字典初始化
        education_dic[education]=0
    education_job_class_big[job]=education_dic

for index in range(df.shape[0]):
    for job_name in job_class_big:
            if job_name in df.loc[index,'职位类别']:
                education_job_class_big[job_name][df.loc[index,'学历要求']]+=1
                break
print(education_job_class_big)


color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00',
            '#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080']
plt.rcParams['font.sans-serif']=['SimHei'];  #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同大类工作学历要求分布")
plt.xlabel("工作岗位大类")
plt.ylabel("学历要求 /次 ")
x=np.arange(len(education_spread.keys()))
plt.xticks(x,education_spread.keys(),fontsize=10,rotation=90)
for index  in range(len(list(job_class_big))):
    plt.plot(education_job_class_big[list(job_class_big)[index]].values(),color=color_list[index],label=list(job_class_big)[index])
    for index1 in range(len(education_spreadkeys())):
        plt.text(x[index1],list(education_job_class_big[list(job_class_big)[index]].values())[index1],list(education_job_class_big[list(job_class_big)[index]].values())[index1])

plt.legend()
plt.show()