一、表的结构
id | int |
title | varchar(255) |
area | varchar(255) |
salary | varchar(255) |
edu_list | varchar(255) |
company_name | varchar(255) |
company_tag_list | varchar(255) |
info_desc | varchar(255) |
href | varchar(255) |
二、代码
import time
from selenium import webdriver
from selenium.webdriver.common.by import By
import pymysql
from urllib import parse
KeyWord = input("请输入你要搜索的岗位关键字:")
KeyWords = parse.quote(parse.quote(KeyWord))
def con():
db = pymysql.connect(
port=3306,
user='root',
password='输入密码',
db='输入数据库名',
charset='utf8'
)
sql = 'CREATE TABLE {}(`id` int(11) NOT NULL AUTO_INCREMENT,`title` VARCHAR(255) DEFAULT NULL,`area` VARCHAR(' \
'255) DEFAULT NULL,`salary` VARCHAR(255) DEFAULT NULL,`edu_list` VARCHAR(255) DEFAULT NULL,' \
'`company_name` VARCHAR(255) DEFAULT NULL,`company_tag_list` VARCHAR(255) DEFAULT NULL,`info_desc` VARCHAR(' \
'255) DEFAULT NULL,' \
'`href` VARCHAR(500) DEFAULT NULL,PRIMARY KEY (`id`)) ENGINE=InnoDB DEFAULT CHARSET=utf8;'.format(KeyWord)
cursor = db.cursor()
cursor.execute(sql)
cursor.close()
db.close()
class save:
con()
db = pymysql.connect(
port=3306,
user='root',
password='输入密码',
db='输入数据库名',
charset='utf8'
)
cursor = db.cursor()
try:
sql_1 = """insert into {}(title,area,salary,edu_list,company_name,company_tag_list,info_desc,
href) values(%s, %s, %s, %s, %s, %s,%s,%s)""".format(KeyWord)
except:
print("数据有问题")
def get_job_info():
lit = driver.find_elements(by=By.CSS_SELECTOR, value='.job-card-wrapper')
# print(lit)
for li in lit:
title = li.find_element(by=By.CSS_SELECTOR, value='.job-name ').text
area = li.find_element(by=By.CSS_SELECTOR, value='.job-area').text
salary = li.find_element(by=By.CSS_SELECTOR, value='.salary').text
edu_list = li.find_element(by=By.CSS_SELECTOR, value='.tag-list').text
company_name = li.find_element(by=By.CSS_SELECTOR, value='.company-name').text
company_tag_list = li.find_element(by=By.CSS_SELECTOR, value='.company-tag-list').text
info_desc = li.find_element(by=By.CSS_SELECTOR, value='.info-desc').text
href = li.find_element(by=By.CSS_SELECTOR, value='.job-card-left').get_attribute('href')
dit = {
'标题': title,
'地区': area,
'薪资': salary,
'经验': edu_list,
'公司名称': company_name,
'公司领域': company_tag_list,
'福利': info_desc,
'详情页': href,
}
title = dit.get('标题')
area = dit.get('地区')
salary = dit.get('薪资')
edu_list = dit.get('经验')
company_name = dit.get('公司名称')
company_tag_list = dit.get('公司领域')
info_desc = dit.get('福利')
href = dit.get('详情页')
# print(title)
save.cursor.execute(save.sql_1,
(title, area, salary, edu_list, company_name, company_tag_list, info_desc, href))
save.db.commit()
# print(dit)
if __name__ == '__main__':
driver = webdriver.Chrome()
driver.implicitly_wait(10)
for page in range(1, 11):
try:
print(f'正在获取{page}页的数据')
driver.get('xxxxxxxxxxxxxxxxxxxxxx')#网页
time.sleep(1)
get_job_info()
next_page = driver.find_element(by=By.CSS_SELECTOR,
value='.options-pages a:nth-child(10)')
if next_page:
next_page.click()
else:
print("没数据")
except:
pass
driver.quit()