python爬取工资_python爬取拉勾网

又到了一年一度的招聘热季,大量的工作向我们招手,今天我和大家一起看看拉勾网中各公司对于python人才的需求。

import jieba

import numpy as np

import pandas as pd

import matplotlib.pyplot as plt

from pyecharts import Geo

from wordcloud import WordCloud

import re

import matplotlib

from imageio import imread

url="https://www.lagou.com/jobs/positionAjax.json?needAddtionalResult=false"

def data(page):

return {

"first": "true",

"pn": f"{page}",

"kd": "python",

'sid': '4256fece2141497bb5a8e1bfa69bcee7'

}

def get_cookies():

headers={

'referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',

'authority': 'www.lagou.com',

'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/80.0.3987.149 Safari/537.36',

}

response=requests.get('https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput=',headers=headers)

return response.cookies.get_dict()

cookies=get_cookies()

headers={'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.61 Safari/537.36'

,'host':'www.lagou.com'

,'referer': 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='}

def get_data(data):

response = requests.post(url=url, headers=headers, data=data, cookies=cookies)

# json数据

content = response.json()['content']['positionResult']['result']

j = 1

companyLabelstr=''

for i in content:

city = i['city']

companyFullName = i['companyFullName']

companySize = i['companySize']

education = i['education']

positionName = i['positionName']

salary = i['salary']

workYear = i['workYear']

companyLabelList=i['companyLabelList']

if len(companyLabelList)>0:

companyLabelList=''.join(companyLabelList)

else:

companyLabelList=''

'''

companyLabelstr=companyLabelList+companyLabelstr

print(workYear,companyLabelList)

print(companyLabelstr)

'''

with open('python.csv', 'a+', encoding='utf-8')as f:

f.write(f'{city},{companyFullName},{companySize},{education},{positionName},{salary},{workYear},{companyLabelList}\n')

print(f'第{j}条数据成功')

j += 1

if __name__ == '__main__':

for i in range(1, 11):

params = data(i)

get_data(params)

matplotlib.rcParams['font.family']='SimHei'

plt.rcParams['axes.labelsize']=16

plt.rcParams['xtick.labelsize']=14

plt.rcParams['ytick.labelsize']=14

plt.rcParams['legend.fontsize']=12

plt.rcParams['figure.figsize']=[15,9]

data=pd.read_excel(r'C:\Users\2020\Desktop\python2.xls',encoding='utf-8')

1.学历

data['学历'].value_counts().plot(kind='bar',rot=0)

2.工作经验

data['年限'].value_counts().plot(kind='bar',rot=0,color='g')

3.城市分析

plt.rcParams['figure.figsize']=[15,15]

data['城市'].value_counts().plot(kind='pie',autopct='%1.2f%%',explode=np.linspace(0,1.5,18))

4.公司待遇分析

(1)分词操作

a=len(data['公司福利'])

str=''

for i in range(a):

b=data['公司福利'][i]

if type(b)==float:

b=''

str=str+b

jieba.add_word('五险一金')

jieba.add_word('牛B')

jieba.add_word('年底双薪')

jieba.add_word('带薪年假')

jieba.add_word('股票期权')

jieba.add_word('定期体检')

jieba.add_word('节日礼物')

words = jieba.lcut(str)

counts = {}

for word in words:

counts[word] = counts.get(word, 0) + 1

items = list(counts.items())

items.sort(key=lambda x: x[1], reverse=True)

with open('词频统计',mode='w',encoding='utf-8')as f:

for i in range(20):

word,count=items[i]

f.writelines('{}\t{}\n'.format(word,count))

(2)词云图展示

with open('词频统计',mode='r',encoding='utf-8')as f:

text=f.read()

wc=WordCloud(font_path=r'C:\Users\2020\Desktop\simhei.ttf',background_color='white',width=1000,max_words=100,height=860,margin=2).generate(text)

plt.imshow(wc)

plt.axis('off')

plt.show()

5.全国工资水平分析

data2=list(map(lambda x:(data['城市'][x],eval(re.split('k|K',data['工资'][x])[0])*1000),range(len(data))))

data3=pd.DataFrame(data2,index)

data4=list(map(lambda x:(data3.groupby(0).mean()[1].index[x],data3.groupby(0).mean()[1].values[x]),range(len(data3.groupby(0)))))

geo=Geo('全国python工资布局','制作人:止疼',title_color='#fff',title_pos='left',width=1200,height=600,background_color='#404a59')

attr,value=geo.cast(data4)

geo.add('',attr,value,type='heatmap',is_visualmap=True,maptype='china',visual_range=[0,300],visual_text_color='#fff')

geo.render()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值