"""
Developer:Gladys
Objective:zhipin.com
Tool:Selenium
"""
import re
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import random
import csv
# options = webdriver.ChromeOptions()
# options.add_argument("--proxy-server=代理IP")
with open('b.csv', 'a+', encoding='UTF-8-SIG', newline='') as filename:
csvwriter = csv.DictWriter(filename,fieldnames=[
'职位',
'薪资',
'工作城市',
'工作经验',
'学历',
'公司名称',
'技能'
])
csvwriter.writeheader()
driver = webdriver.Chrome()
driver.implicitly_wait(10)
def job_info():
time.sleep(5)
lis = driver.find_elements(By.CSS_SELECTOR, '.job-card-body')
liss = driver.find_elements(By.CSS_SELECTOR, '.job-card-footer')
for li in lis:
job_name = li.find_element(By.CLASS_NAME, 'job-name').text
salary = li.find_element(By.CLASS_NAME, 'salary').text
salary_split = salary.split('·')
salary = salary_split[0]
job_area = li.find_element(By.CLASS_NAME, 'job-area').text
experience_list = li.find_elements(By.XPATH, './/ul[@class="tag-list"]/li')
experience = []
for i in experience_list[:-1]:
experience.append(i.text)
experience_str = ",".join([str(x) for x in experience])
education = li.find_element(By.CLASS_NAME, 'tag-list').text[-2:]
company_name = li.find_element(By.CLASS_NAME, 'company-name').text
skill_list = liss[lis.index(li)].find_elements(By.XPATH, './/ul[@class="tag-list"]/li')
skill = []
for skill_i in skill_list:
skill_i_text = skill_i.text
if len(skill_i_text) == 0:
continue
skill.append(skill_i_text)
print(job_name,salary,job_area,experience_str,education,company_name,skill,sep='|')
dict = {
'职位':job_name,
'薪资':salary,
'工作城市':job_area,
'工作经验':experience_str,
'学历':education,
'公司名称':company_name,
'技能':skill
}
csvwriter.writerow(dict)
city_list =[101210100]
for city in city_list:
driver.get(f'https://www.zhipin.com/c{city}/?query=java & ka=sel-city-{city}')
driver.implicitly_wait(random.randint(5,8))
driver.execute_script('document.documentElement.scrollTop = document.documentElement.scrollHeight') #下拉条
for page in range(1,11):
job_info()
driver.find_element(By.CLASS_NAME, 'ui-icon-arrow-right').click()
time.sleep(3)
driver.quit()
Hadoop实战---直聘爬虫代码
最新推荐文章于 2023-11-30 09:20:37 发布