from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
# 创建游标用来连接数据库
import pymysql
conn = pymysql.connect(host='localhost',user='root',password='010830',db='huawei')
cursor = conn.cursor()
#执行一个SQL语句
cursor.execute('create table if not exists qcwy_job(job varchar(255),data varchar(255),sal varchar(255),address varchar(255),exp varchar(255),edu varchar(255),num varchar(255),company_name varchar(255))')
# 实现反监测(三不)
from selenium.webdriver import ChromeOptions
option = ChromeOptions()
option.add_experimental_option('excludeSwitches',['enable-automation'])
# 实例化一个浏览器对象
bro = webdriver.Chrome(chrome_options=option)
bro.maximize_window()
sleep(3)
#UA伪装
headers = {
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/99.0.4844.74 Safari/537.36'
}
# 对前程无优网址发起请求
for page in range(1,6):
#爬取的网址为
url = 'https://search.51job.com/list/040000,000000,0000,00,9,99,%25E6%2595%25B0%25E6%258D%25AE%25E5%2588%2586%25E6%259E%2590%25E5%25B8%2588,2,{}.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='.format(page)
bro.get(url)
sleep(10)
div_list = bro.find_elements(By.XPATH,'/html/body/div[2]/div[3]/div/div[2]/div[4]/div[1]/div')
# print(div_list)
for i in div_list:
info = i.text.split('\n')
# print(info)
#提取岗位
job = info[0]
#发布日期
date = info[1]
#薪资
sal = info[2]
for i in range(len(info)):
if '|' in info[i]:
#位置
address = info[i].split('|')[0]
#经验
exp = info[i].split('|')[1]
#学历
edu = info[i].split('|')[-2]
#人数
num = info[i].split('|')[-1]
#公司
company_name = info[-3]
sql = "insert into qcwy_job values(%s,%s,%s,%s,%s,%s,%s,%s)"
program = ((job,date,sal,address,exp,edu,num,company_name),
(job,date,sal,address,exp,edu,num,company_name))
cursor.executemany(sql, program)
sleep(10)
# 提交数据,并保存到数据库中
# re = cursor.fetchall()
conn.commit()
cursor.close()
conn.close()
招聘_qcwy
最新推荐文章于 2024-07-29 14:37:54 发布