from bs4 import BeautifulSoup as bs
from selenium import webdriver
import time
import csv
def request_data():
# 设置打开网页不自动关闭
option = webdriver.ChromeOptions()
option.add_experimental_option("detach", True)
browser = webdriver.Chrome(options=option)
data_all = []
for i in range(1, 172):
browser.get(f'https://search.51job.com/list/090200,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590,2,{i}.html')
time.sleep(1)
data_all.append(browser.page_source)
return data_all
def analysis_data(data):
finally_data = []
for each_item in data:
html = bs(each_item, 'lxml')
all_contents = html.select('.j_joblist>.e')
all_info = []
for item in all_contents:
# 职位
post = item.select_one('.t>.jname.at').get_text()
# 发布时间
announce_time = item.select_one('.t>.time').string
# 公司
company = item.select_one('.cname.at').get_text()
# 年薪
wages = item.select_one('.sal').string
# 条件要求
ask = item.select_one('.d.at').get_text()
# 公司规模
scale = item.select_one('.dc.at').get_text()
# 经营范围
business = item.select_one('.int.at').get_text()
# 详情地址
postlink = item.select_one('.el').attrs['href']
all_info.append([post, announce_time, company, wages, ask, scale, business, postlink])
finally_data.append(all_info)
return finally_data
if __name__ == '__main__':
result = analysis_data(request_data())
with open('./51job.csv', 'w', newline='', encoding='utf-8') as f:
writer = csv.writer(f)
writer.writerow(['招聘岗位', '发布时间', '公司名称', '工资', '招聘条件', '公司规模', '经营范围', '职位连接'])
for item1 in result:
writer.writerows(item1)