1 #-*- coding: utf-8 -*-
2 """
3 Created on Wed Nov 1 20:15:56 20194
5 @author: loo6 """
7
8 importmatplotlib.pyplot as plt9 importcsv10 importnumpy as np11 importre12 from wordcloud importWordCloud,STOPWORDS13
14
15 defreadFile():16 """
17 读取清洗后的文件18 """
19 data =[]20 with open("cleaned_51jobs.csv",encoding='gbk') as f:21 csv_reader = csv.reader(f) #使用csv.reader读取f中的文件
22 data_header = next(csv_reader) #读取第一行每一列的标题
23 for row in csv_reader: #将csv文件中的数据保存到data中
24 data.append(row)25
26 nd_data = np.array(data) #将list数组转化成array数组便于查看数据结构
27 jobName =nd_data[:, 0]28 locality = nd_data[:, 1]29 minSalary = nd_data[:, 2]30 maxSalary = nd_data[:, 3]31 returndata, jobName, locality, minSalary, maxSalary32
33
34
35 defsalary_locality(data):36 """
37 计算城市对应的职位数和平均薪资,并打印38 """
39 city_num =dict()40
41 for job indata:42 loc, minSa, maxSa = job[1], float(job[2]), float(job[3])43 if loc not incity_num:44 avg_salary = minSa*maxSa/2
45 city_num[loc] = (1, avg_salary)46 else:47 num =city_num[loc][0]48 avg_salary = (minSa*maxSa/2 + num * city_num[loc][1])/(num+1)49 city_num[loc] = (num+1, avg_salary)50
51 #将其按职位数降序排列
52 title_sorted = sorted(city_num.items(), key=lambda x:x[1], reverse=True)53 title_sorted =dict(title_sorted)54
55 #将其按平均薪资降序排列
56 salary_sorted = sorted(city_num.items(), key=lambda x:x[1][1], reverse=True)57 salary_sorted =dict(salary_sorted)58
59
60 allCity1, allCity2, allNum, allAvg =[], [], [], []61 i, j = 1, 1
62 #取职位数前20
63 for city intitle_sorted:64 if i<=20:65 allCity1.append(city)66 allNum.append(title_sorted[city][0])67 i += 1
68
69 #取平均薪资前20
70 for city insalary_sorted:71 if j<=20:72 allCity2.append(city)73 allAvg.append(salary_sorted[city][1])74 j += 1
75
76 #解决中文显示问题
77 plt.rcParams['font.sans-serif']=['SimHei']78 plt.rcParams['axes.unicode_minus'] =False79
80 #柱状图在横坐标上的位置
81 x = np.arange(20)82
83 #设置图的大小
84 plt.figure(figsize=(13, 11))85
86 #列出你要显示的数据,数据的列表长度与x长度相同
87 y1 =allNum88 y2 =allAvg89
90 bar_width=0.8 #设置柱状图的宽度
91 tick_label1 =allCity192 tick_label2 =allCity293
94
95 #绘制柱状图
96 plt.subplot(211)97 plt.title('51job——大数据职位数前20名城市')98 plt.xlabel(u"城市")99 plt.ylabel(u"职位数")100 plt.xticks(x,tick_label1) #显示x坐标轴的标签,即tick_label
101 plt.bar(x,y1,bar_width,color='salmon')102
103 plt.subplot(212)104 plt.title('51job——大数据职位平均薪资的前20名城市')105 plt.xlabel(u"城市")106 plt.ylabel(u"平均薪资(千元/月)")107 plt.xticks(x,tick_label2) #显示x坐标轴的标签,即tick_label
108 plt.bar(x,y2,bar_width,color='orchid')109
110 plt.legend() #显示图例,即label
111 #plt.savefig('city.jpg', dpi=500) # 指定像素保存
112 plt.show()113
114
115 defjobTitle(jobName):116
117 word="".join(jobName);118
119 #图片模板和字体
120 #image=np.array(Image.open('model.jpg'))#显示中文的关键步骤
121 font='simkai.ttf'
122
123 #去掉英文,保留中文
124 resultword=re.sub("[A-Za-z0-9\[\`\~\!\@\#\$\^\&\*\(\)\=\|\{\}\'\:\;\'\,\[\]\.\\/\?\~\。\@\#\\\&\*\%\-]", " ",word)125 #已经中文和标点符号
126 wl_space_split =resultword127 #设置停用词
128 sw =set(STOPWORDS)129 sw.add("高提成");sw.add("底薪");sw.add("五险");sw.add("双休")130 sw.add("五险一金");sw.add("社保");sw.add("上海");sw.add("广州")131 sw.add("无责底薪");sw.add("月薪");sw.add("急聘");sw.add("急招")132 sw.add("资深");sw.add("包吃住");sw.add("周末双休");sw.add("代招")133 sw.add("高薪");sw.add("高底薪");sw.add("校招");sw.add("月均")134 sw.add("可实习");sw.add("年薪");sw.add("北京");sw.add("经理")135 sw.add("包住");sw.add("应届生");sw.add("南京");sw.add("专员")136 sw.add("提成");sw.add("方向")137
138 #关键一步
139 my_wordcloud = WordCloud(font_path=font,stopwords=sw,scale=4,background_color='white',140 max_words = 100,max_font_size = 60,random_state=20).generate(wl_space_split)141 #显示生成的词云
142 plt.imshow(my_wordcloud)143 plt.axis("off")144 plt.show()145
146 #保存生成的图片
147 #my_wordcloud.to_file('title.jpg')
148
149
150 deflocalityWordCloud(locality):151 font='simkai.ttf'
152 locality = " ".join(locality)153
154 #关键一步
155 my_wordcloud = WordCloud(font_path=font,scale=4,background_color='white',156 max_words = 100,max_font_size = 60,random_state=20).generate(locality)157
158 #显示生成的词云
159 plt.imshow(my_wordcloud)160 plt.axis("off")161 plt.show()162
163 #保存生成的图片
164 #my_wordcloud.to_file('place.jpg')
165
166
167 defmain():168 #得到清洗后的数据数据
169 data, jobName, locality, minSalary, maxSalary =readFile()170 #进行分析
171 salary_locality(data)172 jobTitle(jobName)173 localityWordCloud(locality)174
175
176 if __name__ == '__main__':177 main()