用selenium做UI自动化测试时,无聊就尝试用它模仿用户来爬取招聘信息。检索测试工程师的招聘结果
'''selenium爬虫拉钩测试的招聘信息'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=chrome_options)
# driver=webdriver.Chrome()
driver.implicitly_wait(15)
def located():
driver.get('https://www.lagou.com')
driver.find_element(By.XPATH,"//a[text()='成都站']").click()
driver.find_element(By.XPATH,'//input[@id="search_input"]').send_keys('测试工程师')
driver.find_element(By.XPATH,"//input[@id='search_button']").click()
maxpage=driver.find_element(By.XPATH,"//div[@class='page-number']/span[2]").text
maxpage=int(maxpage)+1
for ab in range(1,maxpage):
ab=str(ab)
print('第'+ab+'页')
next_page()
driver.quit()
def next_page():
for i in range(0,14):
i=str(i)
try:
save_message(i)
except:
save_message(i)
driver.find_element(By.XPATH,"//span[text()='下一页']").click()
def save_message(i):
u_role=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[1]/div[1]/a/h3").text
postion=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[1]/div[1]/a/span/em").text
company=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[2]/div[1]/a").text
company_role=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[2]/div[2]").text
company_url=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[2]/div[1]/a").get_attribute('href')
money=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']").get_attribute('data-salary')
u_require=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[1]/div[2]/div").text
remarkers=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[2]/div[2]").text
a_message=[u_role,money,u_require,postion,company,remarkers,company_role,company_url]
print(a_message)
if __name__ == '__main__':
located()