有关openpyxl的使用请看: python中使用openpyxl操作Excel的常用方法及案例.
wenhaha 的文章目录
1.直接上代码,然后咱们一步一步优化
import requests
from re import findall
from json import loads
import time
import os
import openpyxl
def get_one_page(page, city_code='000000'):
headers = {
'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.212 Safari/537.36'
}
url = f'https://search.51job.com/list/{
city_code},000000,0000,00,9,99,数据分析,2,{
page}.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='
response = requests.get(url, headers=headers)
if response.status_code == 200:
json_data = findall(r'window.__SEARCH_RESULT__\s*=\s*(\{.+?\})</script>', response.text)[0]
return loads(json_data)['engine_search_result']
else:
print('请求失败!')
def get_all_data():
all_data = []
for page in range(1, 11):
result = get_one_page(page)
if not result:
print('没有更多数据')
break
# 保存到excel文件中
save_page_data(result)
print(f'获取第{
page}页数据成功!')
time.sleep(1)
def get_work_book():
# 1. 判断文件是否存在, 存在就加载,不存在就创建
if os.path.exists('files/招聘信息.xlsx'):
wb = openpyxl.load_workbook('files/招聘信息.xlsx')
else:
wb = openpyxl.Workbook()
# 2. 判断是否存在数据分析的表
names = wb.sheetnames
if '数据分析' in names:
sheet = wb['数据分析']
else:
sheet = wb.create_sheet('数据分析')
titles = ['岗位名称', '薪资', '公司名称', '公司性质', '公司地址', '要求', '福利']
for col in range(1, len(titles) + 1):
sheet.cell(1, col).value = titles[col - 1]
return wb, sheet
def save_page_data(data: list):
row = sheet.max_row + 1
for job in data:
# 写入对应的数据
# titles = ['岗位名称', '薪资', '公司名称', '公司性质', '公司地址', '要求', '福利']
job_info = [
job.get('job_name', ''),
job.get('providesalary_text', ''),
job.get('company_name', ''),
job.get('companytype_text', ''),
job.get('workarea_text', ''),
'/'.join(job.get('attribute_text', ['-', '-', '-', '-', '-'])),
job.get('jobwelf', '')
]
for col in range(1, len(job_info) + 1):
sheet.cell(row, col).value = job_info[col - 1]
# print(job)
row += 1
wb.save('files/招聘信息.xlsx'<