from selenium.webdriver.common.by import By
from selenium import webdriver
from time import sleep
from openpyxl import load_workbook
# 打开文件
work_xlsx = load_workbook('E:\\awork\\project\\爬虫\\work.xlsx')
# 激活excel表
sheet = work_xlsx.active
# 向excel中写入表头
sheet['a1'] = 'company'
sheet['b1'] = 'work1'
sheet['c1'] = 'work2'
sheet['d1'] = 'work3'
sheet['e1'] = 'work4'
sheet['f1'] = 'work5'
sheet['g1'] = 'work6'
sheet['h1'] = 'work7'
sheet['i1'] = 'work8'
sheet['j1'] = 'work9'
sheet['k1'] = 'work10'
driver = webdriver.Chrome()
driver.implicitly_wait(10)
driver.get("https://jy.sicnu.edu.cn/home/studentQuery?id=8")
driver.maximize_window()
name1 = driver.find_element(By.XPATH, '//*[@id="btn11"]/b').text
print(name1)
# 获取公司father
father = driver.find_element(By.ID, 'sxh-cydw')
sleep(1)
els = father.find_elements(By.CSS_SELECTOR, "tr")
lst1 = []
lst2 = []
# print(len(els))
# 抓取职位文本
for ele in els:
name = ele.find_element(By.CSS_SELECTOR, 'td + td').text
# print(name)
lst1.append(name) # 将职位放入列表lst1中
sleep(1)
# 点击查看详情
ele.find_element(By.PARTIAL_LINK_TEXT, '查看').click()
sleep(1)
window_handles = driver.window_handles # 获取所有网页
driver.switch_to.window(window_handles[1]) # 切换至第二页
sleep(1)
# 获取职位father
father1 = driver.find_element(By.XPATH,'/html/body/div[4]/div[4]/div/div[1]/div[2]/div[2]/table/tbody')
sleep(1)
els1 = father1.find_elements(By.CSS_SELECTOR, 'tr')
# print(len(els1))
# 将职位抓出来
for ele1 in els1:
name1 = ele1.find_element(By.CSS_SELECTOR, 'td + td > div').text
# print(name1)
lst1.append(name1) # 职位添加到列表lst1中
print(lst1)
sheet.append(lst1) # 将列表lst1中的内容写入文件中
lst1.clear() # 清除列表lst1中的内容
driver.close() # 关闭当前网页
driver.switch_to.window(window_handles[0]) # 切换至第一个网页
work_xlsx.save('E:\\awork\\project\\爬虫\\work.xlsx') # 保存
print('数据写入成功!')
练习:爬取招聘会公司以及职位
最新推荐文章于 2024-06-14 09:50:01 发布