记一次对某招聘网站的数据爬取并可视化

本文记录了一次使用Python爬虫抓取某招聘网站数据的过程,并利用pyecharts库进行数据可视化,包括生成饼状图等。尽管pyecharts在某些地方存在不足,如横坐标显示问题,但最终仍成功实现了图表展示。
摘要由CSDN通过智能技术生成

先单纯记一下代码,有时间再补充细节

pyecharts有很多地方不完善,比如横坐标显示

import requests
import pandas as pd
import re
import csv
import parsel
from pyecharts.charts import Bar
from pyecharts import options as opts

def CN(m): #将中文数字转换为数字并取中位数
    m=m.split('-')
    a=re.findall('\d+',m[0])
    b=re.findall('\d+',m[1])
    return (int(a[0])+int(b[0]))/2

url='https://sou.zhaopin.com/?jl=530&kw=Python&p=1'
params={
    'jl':'530',
    'kw':'Python',
    'p':'1'
}
headers={
    'accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding':'gzip, deflate, br',
    'accept-language':'zh-CN,zh;q=0.9',
    'cache-control':'no-cache',
    'cookie':'FSSBBIl1UgzbN7N443S=LNt9ZAuMwG9MUBjrzulq5Fu_KHAJNmxKJGkYJtyx0elo_Pb8aUNQkjH2SZEyIUiL; urlfrom2=121113803; adfbid2=0; x-zp-client-id=e5a0f51c-c5a5-46ce-b283-d1e397772a62; sts_deviceid=1793bb6a85154c-0529b265a81fba-d7e1739-1327104-1793bb6a8524ce; ZP_OLD_FLAG=false; selectCity_search=530; ssxmod_itna2=iqmO7KYve+hDk7DXKT9wDGrqUgCD0oqAKiEdG9io8DBwhxmD7PI2jOFIFGF7Ig1Y5AFBladTrGVYCAyNC0WzlDhEVjZCFn5HvccY9xzo0rdtuhLxzg8XCix7jmDFqG7OeD==; locationInfo_search={%22code%22:%222406%22%2C%22name%22:%22%E9%95%BF%E6%B2%99%E5%8E%BF%22%2C%22message%22:%22%E5%8C%B9%E9%85%8D%E5%88%B0%E5%B8%82%E7%BA%A7%E7%BC%96%E7%A0%81%22}; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%221082192836%22%2C%22first_id%22%3A%221793bb6a866766-0fc03a939627ee-d7e1739-1327104-1793bb6a86742a%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%2C%22%24latest_utm_source%22%3A%22baidupcpz%22%2C%22%24latest_utm_medium%22%3A%22cpt%22%7D%2C%22%24device_id%22%3A%221793bb6a866766-0fc03a939627ee-d7e1739-1327104-1793bb6a86742a%22%7D; ssxmod_itna=QqjxcD07wxgnKGHD8D2eLeGQY=BKm3LomxQQD/fmDnqD=GFDK40EYBoDCi+BnbRgxa4TL3G35hofFiDp5=8ih7SoDHxY=DU=iqpYD4+KGwD0eG+DD4DWDmmFDnxAQDjxGpnXvTs=DEDmb8DGeDep97DY5DhxDCUGPDwx0CE6D4ErYFY35m0=DhaWr8D7vmDlpxIzowvI34BrBvh4TWDB45cIr6TB2e=l53wx0k4q0OnoHz8xO6jEH1Vcub8QrelmreqG+KaD+K=mutgm1qOWRq+YZDYQhtLy0oHk6DD=; x-zp-device-id=a385ebb86b2bd11dbf04ba6b7f48841d; rt=88da4ca385684b4eaa6593f697696311; at=095cbce67fa54e44b0a17460f5789427; acw_tc=276082a116203945630905699e689e1a2729f210aa0696089752acf7e49c97; d4d6cd0b4a19fa72b8cc377185129bb7=9f249f62-0f4d-4301-abc3-660cb202675d; zpfe_probe_token=84253590sbfe9c4bdf858714a6e1ae568985; Hm_lvt_38ba284938d5eddca645bb5e02a02006=1620204516,1620264939,1620367905,1620394570; Hm_lpvt_38ba284938d5eddca645bb5e02a02006=1620394570; FSSBBIl1UgzbN7N443T=5jTa70lZw7LR3VU0Dhe9TWg.6Gfkrelfgrrgnp233zHtJllBIm1fVSafpCxZ2mNz5GVuWd6kaKYV_SzIke9eHbr8vAKSHaCmQwvFjiP9sJNsO5Tt7LWGcNtdL.WHliMHixaJyPLY.SutqTNiSeB1heROUyCPD8vOLmQ6Hh9sIb9xvsTCDBMc896EyB3yPOEdbn.I2kWWNo8HmfZBTLCxMhM_iYsN2lwIKigdrUmIcBnDlClVUV6ExmkYQvW4KkpdrRb0A9oT8X.LiALq.EbNtwt5ky1JdYop.p4WgY.XVh.ERLZUUVyNmA0.rCDry8xB2U47ojree2uPb4BysE5H8o2LNurNj..cu07vrCtSxNGvKgFVmGGrOq9rsYsfzaEY8xDq',
    'pragma':'no-cache',
    'sec-ch-ua':'" Not A;Brand";v="99", "Chromium";v="90", "Google Chrome";v="90"',
    'sec-ch-ua-mobile':'?0',
    'sec-fetch-dest':'document',
    'sec-fetch-mode':'navigate',
    'sec-fetch-site':'none',
    'sec-fetch-user':'?1',
    'upgrade-insecure-requests':'1',
    'user-agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.93 Safari/537.36',
}
res=requests.get(url,headers=headers,params=params)
r=res.text
selector=parsel.Selector(r)
job=selector.css('.positionlist .iteminfo__line1__jobname span::attr(title)').getall()

salary=[]
salary=selector.css('.positionlist .iteminfo__line2__jobdesc .iteminfo__line2__jobdesc__salary::text').getall()
for i in range(len(salary)):
    salary[i]=salary[i].strip()
dataframe=pd.DataFrame({"职位":job,"薪资":salary})
dataframe.to_csv(r'./zhilian.csv') #生成CSV表格
for i in range(len(salary)):
    salary[i]=CN(salary[i])
print(len(salary))
bar=(
    Bar(init_opts=opts.InitOpts(width="1500px",height="750px"))
    .add_xaxis(job)
    .add_yaxis('薪资',salary)
    .set_global_opts(
        title_opts=opts.TitleOpts(title='智联招聘行情图'),
        xaxis_opts=opts.AxisOpts(
            name_rotate=60,name='岗位',
            axislabel_opts=opts.LabelOpts(rotate=45) #使横坐标斜切45度,不然有些坐标显示不全,不过即使这样也会消失一部分文字
            )
        )
)
bar.render()

运行后效果如图:
在这里插入图片描述
在这里插入图片描述

生成饼状图

from pyecharts.charts import Pie
data=[list(z) for z in zip(job,salary)]
pie=(
    Pie()
    .add('智联招聘行情图',data_pair=data,)
)
pie.render()

生成效果如图:
在这里插入图片描述

  • 3
    点赞
  • 13
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值