所以解析到的原始数据如下:
先来看看python的薪酬榜:
看一下西安的排位,薪资平均真的好低…
import requests
from bs4 import BeautifulSoup
import csv
import random
import time
import argparse
from pyecharts.charts import Line
import pandas as pd
class BossCrawler:
def init(self, query):
self.query = query
self.filename = ‘boss_info_%s.csv’ % self.query
self.city_code_list = self.get_city()
self.boss_info_list = []
self.csv_header = [“city”, “profession”, “salary”, “company”]
@staticmethod
def getheaders():
user_list = [
“Opera/9.80 (X11; Linux i686; Ubuntu/14.10) Presto/2.12.388 Version/12.16”,
“Opera/9.80 (Windows NT 6.0) Presto/2.12.388 Version/12.14”,
“Mozilla/5.0 (Windows NT 6.0; rv:2.0) Gecko/20100101 Firefox/4.0 Opera 12.14”,
“Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.0) Opera 12.14”,
“Opera/12.80 (Windows NT 5.1; U; en) Presto/2.10.289 Version/12.02”,
“Opera/9.80 (Windows NT 6.1; U; es-ES) Presto/2.9.181 Version/12.00”,
“Opera/9.80 (Windows NT 5.1; U; zh-sg) Presto/2.9.181 Version/12.00”,
“Opera/12.0(Windows NT 5.2;U;en)Presto/22.9.168 Version/12.00”,
“Opera/12.0(Windows NT 5.1;U;en)Presto/22.9.168 Version/12.00”,
“Mozilla/5.0 (Windows NT 5.1) Gecko/20100101 Firefox/14.0 Opera/12.0”,
“Opera/9.80 (Windows NT 6.1; WOW64; U; pt) Presto/2.10.229 Version/11.62”,
“Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.10.229 Version/11.62”,
“Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; fr) Presto/2.9.168 Version/11.52”,
“Opera/9.80 (Macintosh; Intel Mac OS X 10.6.8; U; de) Presto/2.9.168 Version/11.52”,
“Opera/9.80 (Windows NT 5.1; U; en) Presto/2.9.168 Version/11.51”,
“Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; de) Opera 11.51”,
“Opera/9.80 (X11; Linux x86_64; U; fr) Presto/2.9.168 Version/11.50”,
“Opera/9.80 (X11; Linux i686; U; hu) Presto/2.9.168 Version/11.50”,
“Opera/9.80 (X11; Linux i686; U; ru) Presto/2.8.131 Version/11.11”,
“Opera/9.80 (X11; Linux i686; U; es-ES) Presto/2.8.131 Version/11.11”,
“Mozilla/5.0 (Windows NT 5.1; U; en; rv:1.8.1) Gecko/20061208 Firefox/5.0 Opera 11.11”,
“Opera/9.80 (X11; Linux x86_64; U; bg) Presto/2.8.131 Version/11.10”,
“Opera/9.80 (Windows NT 6.0; U; en) Presto/2.8.99 Version/11.10”,
“Opera/9.80 (Windows NT 5.1; U; zh-tw) Presto/2.8.131 Version/11.10”,
“Opera/9.80 (Windows NT 6.1; Opera Tablet/15165; U; en) Presto/2.8.149 Version/11.1”,
“Opera/9.80 (X11; Linux x86_64; U; Ubuntu/10.10 (maverick); pl) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (X11; Linux i686; U; ja) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (X11; Linux i686; U; fr) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 6.1; U; zh-tw) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 6.1; U; sv) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 6.1; U; en-US) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 6.1; U; cs) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 6.0; U; pl) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 5.2; U; ru) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 5.1; U;) Presto/2.7.62 Version/11.01”,
“Opera/9.80 (Windows NT 5.1; U; cs) Presto/2.7.62 Version/11.01”,
“Mozilla/5.0 (Windows NT 6.1; U; nl; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01”,
“Mozilla/5.0 (Windows NT 6.1; U; de; rv:1.9.1.6) Gecko/20091201 Firefox/3.5.6 Opera 11.01”,
“Mozilla/4.0 (compatible; MSIE 8.0; Windows NT 6.1; de) Opera 11.01”,
“Opera/9.80 (X11; Linux x86_64; U; pl) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (X11; Linux i686; U; it) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (Windows NT 6.1; U; zh-cn) Presto/2.6.37 Version/11.00”,
“Opera/9.80 (Windows NT 6.1; U; pl) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (Windows NT 6.1; U; ko) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (Windows NT 6.1; U; fi) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (Windows NT 6.1; U; en-GB) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (Windows NT 6.1 x64; U; en) Presto/2.7.62 Version/11.00”,
“Opera/9.80 (Windows NT 6.0; U; en) Presto/2.7.39 Version/11.00”
]
user_agent = random.choice(user_list)
headers = {‘User-Agent’: user_agent}
return headers
def get_city(self):
headers = self.getheaders()
r = requests.get(“http://www.zhipin.com/wapi/zpCommon/data/city.json”, headers=headers)
data = r.json()
return [city[‘code’] for city in data[‘zpData’][‘hotCityList’][1:]]
def get_response(self, url, params=None):
headers = self.getheaders()
r = requests.get(url, headers=headers, params=params)
r.encoding = ‘utf-8’
soup = BeautifulSoup(r.text, “lxml”)
return soup
def get_url(self):
for city_code in self.city_code_list:
url = “https://www.zhipin.com/c%s/” % city_code
self.per_page_info(url)
time.sleep(10)
def per_page_info(self, url):
for page_num in range(1, 11):
params = {“query”: self.query, “page”: page_num}
soup = self.get_response(url, params)
lines = soup.find(‘div’, class_=‘job-list’).select(‘ul > li’)
if not lines:
代表没有数据了,换下一个城市
return
for line in lines:
info_primary = line.find(‘div’, class_=“info-primary”)
city = info_primary.find(‘p’).text.split(’ ')[0]
job = info_primary.find(‘div’, class_=“job-title”).text
过滤答非所谓的招聘信息
if self.query.lower() not in job.lower():
continue
salary = info_primary.find(‘span’, class_=“red”).text.split(‘-’)[0].replace(‘K’, ‘’)
company = line.find(‘div’, class_=“info-company”).find(‘a’).text.lower()
result = dict(zip(self.csv_header, [city, job, salary, company]))
print(result)
自我介绍一下,小编13年上海交大毕业,曾经在小公司待过,也去过华为、OPPO等大厂,18年进入阿里一直到现在。
深知大多数Python工程师,想要提升技能,往往是自己摸索成长或者是报班学习,但对于培训机构动则几千的学费,着实压力不小。自己不成体系的自学效果低效又漫长,而且极易碰到天花板技术停滞不前!
因此收集整理了一份《2024年Python开发全套学习资料》,初衷也很简单,就是希望能够帮助到想自学提升又不知道该从何学起的朋友,同时减轻大家的负担。
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,基本涵盖了95%以上Python开发知识点,真正体系化!
由于文件比较大,这里只是将部分目录大纲截图出来,每个节点里面都包含大厂面经、学习笔记、源码讲义、实战项目、讲解视频,并且后续会持续更新
如果你觉得这些内容对你有帮助,可以添加V获取:vip1024c (备注Python)
一、Python所有方向的学习路线
Python所有方向的技术点做的整理,形成各个领域的知识点汇总,它的用处就在于,你可以按照下面的知识点去找对应的学习资源,保证自己学得较为全面。
二、Python必备开发工具
工具都帮大家整理好了,安装就可直接上手!
三、最新Python学习笔记
当我学到一定基础,有自己的理解能力的时候,会去阅读一些前辈整理的书籍或者手写的笔记资料,这些笔记详细记载了他们对一些技术点的理解,这些理解是比较独到,可以学到不一样的思路。
四、Python视频合集
观看全面零基础学习视频,看视频学习是最快捷也是最有效果的方式,跟着视频中老师的思路,从基础到深入,还是很容易入门的。
五、实战案例
纸上得来终觉浅,要学会跟着视频一起敲,要动手实操,才能将自己的所学运用到实际当中去,这时候可以搞点实战案例来学习。
六、面试宝典
简历模板
a0d6186c1.png)
六、面试宝典