量化分析
人力资源数据分析
一、题目:
-
国内不同地点的招聘人数总数
-
不同类别岗位的平均工作年限分析
-
不同类别(大类、小类)的职位招聘人数总数,如“技术>测试”表示大类是技术,小类是测试
-
不同年份/月份发布的职位需求人数分析
-
不同地点不同大类职位类别的职位招聘人数总数
-
描述上述1-6数据分析过程中你遇到了哪些问题以及解决方法
二、要求
用Python进行数据分析,分析职位需求数据,并且进行数据清洗及各种维度的数据分析及可视化
三、可视化示例
这里就给大家简单举例两个啦,基本上上面的题目在代码中都可以一一表示出来。
四、代码展示
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
#题目1
df=pd.read_excel('job_data.xlsx',header=0)#读取数据
print(df)
city_list=list(df['工作地点'].value_counts().keys())#获取工作地点的名称list
print(city_list)
city_job=[]
for city in city_list:
df_city=df[df['工作地点'].isin([city])].reset_index()#按照工作地点获取对应的dataframe
city_job.append(sum(df_city['招聘人数']))#获取对应的招聘人数总数
print(city_job)
plt.rcParams['font.sans-serif']=['SimHei']; #开始画图
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同城市招聘人数")
plt.xlabel("城市")
plt.ylabel("人数 /人")
x=np.arange(len(city_list))
plt.xticks(x,city_list,fontsize=10)
plt.bar(x,city_job,color='g')#画柱状图
for index in range(len(city_list)):
plt.text(x[index],city_job[index],city_job[index])
plt.show()
#题目2
job_class=df['职位类别'].value_counts().keys()#获取职位的名称list
for index in range(df.shape[0]):
if df.loc[index,'工作年限']=='不限':
df.loc[index,'工作年限']=0
elif df.loc[index,'工作年限']=='10年以上':
df.loc[index,'工作年限']=10
else:
df.loc[index,'工作年限']=int(df.loc[index,'工作年限'].split('-')[0])
print(city_list)
job_class_worktime=[]
for job in job_class:
df_job=df[df['职位类别'].isin([job])].reset_index()#按照职位类别获取对应的dataframe
job_class_worktime.append(round(sum(df_job['工作年限'])/df_job.shape[0],1))#获取不同职位的平均招聘人数
print(job_class_worktime)
plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同岗位平均工作年限")
plt.xlabel("工作岗位")
plt.ylabel("工作年限 /年")
x=np.arange(len(job_class_worktime))
plt.xticks(x,job_class,fontsize=10,rotation=90)
plt.bar(x,job_class_worktime,color='b')
for index in range(len(job_class)):
plt.text(x[index],job_class_worktime[index],job_class_worktime[index])
plt.show()
for index in range(len(job_class)):
print(f'岗位 {job_class[index]} 平均工作年限为 {job_class_worktime[index]}年')#打印输出不同职位对应工作年限的平均值
#题目3
job_class_recrutment=[]
for job in job_class:
df_job=df[df['职位类别'].isin([job])].reset_index()#按照职位类别获取对应的dataframe
job_class_recrutment.append(sum(df_job['招聘人数']))#获取不同职位的平均招聘人数
job_class_big=[]
for job in job_class:
job_class_big.append(job.split('>')[0])
job_class_big=set(job_class_big)
print(job_class_big)
job_class_big_recrutment={}#建立字典,用来存储大类岗位的招聘人数
for item in job_class_big:
job_class_big_recrutment[item]=0
for index in range(len(job_class)):
for job_name in job_class_big:
if job_name in job_class[index]:
job_class_big_recrutment[job_name]+=job_class_recrutment[index]
break
print(job_class_big_recrutment)
plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("大类工作岗位招聘人数")
plt.xlabel("工作岗位")
plt.ylabel("招聘人数 /人")
x=np.arange(len(job_class_big_recrutment.keys()))
plt.xticks(x,job_class_big_recrutment.keys(),fontsize=10,rotation=90)
plt.bar(x,job_class_big_recrutment.values(),color='y')
for index in range(len(job_class_big_recrutment.keys())):
plt.text(x[index],list(job_class_big_recrutment.values())[index],list(job_class_big_recrutment.values())[index])
plt.show()
#题目4
df1=df['发布时间'].str.split('-',expand=True)
df1.columns =['年', '月', '日']
print(df1)
df['年']=df1['年']
df['月']=df1['月']
year_recrutment=df['年'].value_counts()
month_recrutment=df['月'].value_counts()
year_recrutment_people=[]
for year in year_recrutment.keys():
df_year=df[df['年'].isin([year])].reset_index()#按照年获取对应的dataframe
year_recrutment_people.append(sum(df_year['招聘人数']))#获取不同年的招聘人数
month_recrutment_people=[]
for month in month_recrutment.keys():
df_month=df[df['月'].isin([month])].reset_index()#按照月获取对应的dataframe
month_recrutment_people.append(sum(df_month['招聘人数']))#获取不同月的招聘人数
print(year_recrutment_people)
print(month_recrutment_people)
plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("年 招聘人数")
plt.xlabel("年份")
plt.ylabel("招聘人数 /人")
x=np.arange(len(year_recrutment.keys()))
plt.xticks(x,year_recrutment.keys(),fontsize=10,rotation=90)
plt.bar(x,year_recrutment_people,color='y')
for index in range(len(year_recrutment_people)):
plt.text(x[index],year_recrutment_people[index],year_recrutment_people[index])
plt.show()
plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("月 招聘人数")
plt.xlabel("月份")
plt.ylabel("招聘人数 /人")
x=np.arange(len(month_recrutment.keys()))
plt.xticks(x,month_recrutment.keys(),fontsize=10,rotation=90)
plt.bar(x,month_recrutment_people,color='lightgreen')
for index in range(len(month_recrutment_people)):
plt.text(x[index],month_recrutment_people[index],month_recrutment_people[index])
plt.show()
#题目5
city_job_class_big={}#用来保存不同城市不同工作大类的招聘总人数
for city in city_list:
df_city=df[df['工作地点'].isin([city])].reset_index()#按照工作地点获取对应的dataframe
job_class_big_recrutment={}#建立字典,用来存储大类岗位的招聘人数
for item in job_class_big:#初始化字典
job_class_big_recrutment[item]=0
for index in range(df_city.shape[0]):
for job_name in job_class_big:
if job_name in df_city.loc[index,'职位类别']:
job_class_big_recrutment[job_name]+=df_city.loc[index,'招聘人数']
break
city_job_class_big[city]=job_class_big_recrutment
print(city_job_class_big)
color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00',
'#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080']
plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同地点大类招聘人数")
plt.xlabel("工作岗位大类")
plt.ylabel("招聘总人数 /人")
x=np.arange(len(job_class_big))
plt.xticks(x,job_class_big,fontsize=10,rotation=90)
for index in range(len(city_list)):
plt.plot(city_job_class_big[city_list[index]].values(),color=color_list[index],label=city_list[index])
for index1 in range(len(job_class_big)):
plt.text(x[index1],list(city_job_class_big[city_list[index]].values())[index1],list(city_job_class_big[city_list[index]].values())[index1])
plt.legend()
plt.show()
#题目6
education_spread=df['学历要求'].value_counts()
education_job_class_big={}#用来保存不同城市不同工作大类的招聘总人数
for job in job_class_big:
education_dic={}
for education in education_spread.keys():#字典初始化
education_dic[education]=0
education_job_class_big[job]=education_dic
for index in range(df.shape[0]):
for job_name in job_class_big:
if job_name in df.loc[index,'职位类别']:
education_job_class_big[job_name][df.loc[index,'学历要求']]+=1
break
print(education_job_class_big)
color_list=['#CD853F','#DC143C','#00FF7F','#FF6347','#8B008B','#00FFFF','#0000FF','#8B0000','#FF8C00',
'#1E90FF','#00FF00','#FFD700','#008080','#008B8B','#8A2BE2','#228B22','#FA8072','#808080']
plt.rcParams['font.sans-serif']=['SimHei']; #设置支持中文
plt.rcParams['axes.unicode_minus'] = False
plt.title("不同大类工作学历要求分布")
plt.xlabel("工作岗位大类")
plt.ylabel("学历要求 /次 ")
x=np.arange(len(education_spread.keys()))
plt.xticks(x,education_spread.keys(),fontsize=10,rotation=90)
for index in range(len(list(job_class_big))):
plt.plot(education_job_class_big[list(job_class_big)[index]].values(),color=color_list[index],label=list(job_class_big)[index])
for index1 in range(len(education_spreadkeys())):
plt.text(x[index1],list(education_job_class_big[list(job_class_big)[index]].values())[index1],list(education_job_class_big[list(job_class_big)[index]].values())[index1])
plt.legend()
plt.show()
五、数据表格
六、总结
这个数据分析还是比较简单的,大家好好看看,还是可以看懂的,有什么问题call me!!!!
文案分享
“所有的爱都很可贵,但真诚和深情更胜一筹”