网上学习资料一大堆,但如果学到的知识不成体系,遇到问题时只是浅尝辄止,不再深入研究,那么很难做到真正的技术提升。
一个人可以走的很快,但一群人才能走的更远!不论你是正从事IT行业的老鸟或是对IT行业感兴趣的新人,都欢迎加入我们的的圈子(技术交流、学习资源、职场吐槽、大厂内推、面试辅导),让我们一起学习成长!
'''公司类型'''
company_type = li.find_element(By.CSS_SELECTOR,
'#wrap > div.page-job-wrapper > div.page-job-inner > '
'div > div.job-list-wrapper > div.search-job-result > '
'ul > li > div.job-card-body.clearfix > div > '
'div.company-info > ul > li:nth-child(1)').text
'''公司规模'''
company_people = li.find_element(By.CSS_SELECTOR,
'#wrap > div.page-job-wrapper > div.page-job-inner '
'> div > div.job-list-wrapper > '
'div.search-job-result > ul > li> '
'div.job-card-body.clearfix > div > '
'div.company-info > ul > li:last-child').text
'''获取薪资水平'''
money = li.find_element(By.CSS_SELECTOR, '.job-card-wrapper .job-card-left .salary').text
'''经验'''
experience = li.find_element(By.CSS_SELECTOR,
'.job-card-wrapper .job-card-left .tag-list :first-child').text
experience = str(experience)
'''获取学历要求'''
education = li.find_element(By.CSS_SELECTOR,
'.job-card-wrapper .job-card-left .tag-list li+li').text
if '月' in education:
education = '本科'
else:
education = education.strip('\n')
'''技能要求'''
skill_list = li.find_elements(By.CSS_SELECTOR,
'#wrap > div.page-job-wrapper > div.page-job-inner > '
'div > div.job-list-wrapper > div.search-job-result > '
'ul > li > div.job-card-footer.clearfix > ul > li')
skill = []
for skill_i in skill_list:
skill_i_text = skill_i.text
if len(skill_i_text) == 0:
continue
skill.append(skill_i_text)
skill = str(skill)
'''福利待遇'''
benefit = li.find_element(By.CSS_SELECTOR, '.job-card-wrapper .info-desc').text
try:
'''岗位描述'''
li.find_element(By.CSS_SELECTOR,
"#wrap > div.page-job-wrapper > div.page-job-inner > div > "
"div.job-list-wrapper > div.search-job-result > ul > li > "
"div.job-card-body.clearfix > a").click()
except ElementClickInterceptedException:
print("正在关闭弹窗")
browser.find_element(By.CSS_SELECTOR, ".boss-login-dialog-content .boss-login-dialog-header .boss-login-close").click()
print("关闭成功")
li.find_element(By.CSS_SELECTOR,
"#wrap > div.page-job-wrapper > div.page-job-inner > div > "
"div.job-list-wrapper > div.search-job-result > ul > li > "
"div.job-card-body.clearfix > a").click()
# browser.execute_script('$(".login-dialog-wrapper").css("display","none")')
# 找到详情页url并打开
time.sleep(5)
# 将窗口移动到最后一个标签页
browser.switch_to.window(browser.window_handles[-1])
job_details = browser.find_element(By.XPATH, '//*[@id="main"]/div[3]/div/div[2]/div[1]/div[2]').text
# job_details = browser.find_element_by_xpath('//*[@id="main"]/div[3]/div/div[2]/div[1]/div[2]').text
# print(job_details)
time.sleep(1)
# 关闭详情页
browser.close()
browser.switch_to.window(browser.window_handles[-1])
'''打印输出'''
print(
address + ',' + job_name + ',' + company + ',' + company_type + ',' + company_people + ','
+ money + ',' + experience + ',' + education + ',' + skill + ',' + benefit + ',' + job_details.replace(
"\n", ""))
with open('数据1.csv', 'a+', newline='', encoding='utf-8-sig') as csvfile:
fieldnames = ['address', 'job_name', 'company_name', 'company_type', 'company_people', 'salary',
'experience', 'education', 'skills', 'benefits', 'job_desc']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writerow({'address': address, 'job_name': job_name, 'company_name': company,
'company_type': company_type, 'company_people': company_people, 'salary': money,
'experience': experience, 'education': education, 'skills': skill,
'benefits': benefit,
'job_desc': job_details.replace("\n", "")
})
time.sleep(10)
except UnicodeEncodeError:
continue
time.sleep(10)
'''利用滑块,使得页面得以跳动,模拟人工'''
js = 'window.scrollTo(0,2000)'
browser.execute_script(js) # 读不懂就对了,这是js代码,滑动滑块的
time.sleep(3)
browser.find_element(By.CSS_SELECTOR, "#wrap > div.page-job-wrapper > div.page-job-inner > div > "
"div.job-list-wrapper > div.search-job-result > div > div > div > "
"a:last-child").click()
time.sleep(8)
else:
print('没有内容,停止运行')
break
if name == ‘main’:
# 数据开发、数据分析、ETL、数据仓库、数据挖掘、“ETL工程师”,“数据仓库”,
job_name = [“数据挖掘”]
for job in job_name:
# 北京、上海、广州、深圳、杭州、天津、西安、
# 苏州、武汉、厦门、长沙、成都、郑州、重庆
#“101010100”,“101020100”, “101280100”,“101280600”,“101210100”,“101030100”,“101110100”,“101190400”,
place = [“101200100”,“101230200”,“101250100”,“101270100”,“101180100”,“101040100”]
i = 0
print(“开始爬取”+str(job)+“的岗位信息”)
for p in place:
job_url = “https://www.zhipin.com/web/geek/job?query=” + job + “&city=” + p
getData(job_url)
i = i + 1
print(str(job)+"的第" + str(i) + "所城市爬取完成")
print(str(job)+"岗位爬取完成")
结果展示:
![](https://img-blog.csdnimg.cn/4e85c1b0b39a472c872cadf8debc3293.png)
## 2.数据分析
package Job.DataProcess
import org.apache.spark.sql.SparkSession
import org.apache.spark.sql.functions.{col, desc, round}
/**
-
数据开发岗位
*/
object DataDev {
def main(args: Array[String]): Unit = {val spark = SparkSession.builder()
.master(“local[4]”)
.appName(“Test”)
.getOrCreate()val data = spark.read
.option(“header”, value = true)
.option(“delimiter”, “,”)
.option(“inferSchema”, value = true)
.csv(“file:\D:\桌面文件\毕设\数据\招聘数据.csv”)/**
- 各行业用人需求占比
*/
data.distinct()
.select(“company_type”)
.filter(col(“job_name”).like(“%数据开发%”))
.filter(col(“company_type”).=!=(“”))
.groupBy(“company_type”)
.count()
.orderBy(desc(“count”))
.limit(10)
.show()
/**
- 经验需求和薪资分析
*/
val experienceCount = data
.distinct()
.select(“experience”)
.filter(col(“job_name”).like(“%数据开发%”))
.filter(col(“experience”).=!=(“”))
.groupBy(“experience”)
.count()
val experienceSalary = data
.distinct()
.filter(col(“job_name”).like(“%数据开发%”))
.filter(col(“experience”).=!=(“”))
.groupBy(“experience”)
.agg(“salary” -> “avg”)
.select(col(“experience”), round(col(“avg(salary)”)) as “avg_salary”)
experienceCount.join(experienceSalary, “experience”)
.orderBy(desc(“count”))
.limit(6).show()
/**
- 学历需求和薪资分析
*/
val educationCount = data
.distinct()
.select(“education”)
.filter(col(“job_name”).like(“%数据开发%”))
.filter(col(“education”).=!=(“”))
.groupBy(“education”)
.count()
val educationSalary = data
.distinct()
.filter(col(“job_name”).like(“%数据开发%”))
.filter(col(“education”).=!=(“”))
- 各行业用人需求占比
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新
既有适合小白学习的零基础资料,也有适合3年以上经验的小伙伴深入学习提升的进阶课程,涵盖了95%以上大数据知识点,真正体系化!
由于文件比较多,这里只是将部分目录截图出来,全套包含大厂面经、学习笔记、源码讲义、实战项目、大纲路线、讲解视频,并且后续会持续更新