selenium爬取拉钩网数据

用selenium做UI自动化测试时,无聊就尝试用它模仿用户来爬取招聘信息。检索测试工程师的招聘结果

'''selenium爬虫拉钩测试的招聘信息'''
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options

chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(chrome_options=chrome_options)
# driver=webdriver.Chrome()
driver.implicitly_wait(15)
def located():
    driver.get('https://www.lagou.com')
    driver.find_element(By.XPATH,"//a[text()='成都站']").click()
    driver.find_element(By.XPATH,'//input[@id="search_input"]').send_keys('测试工程师')
    driver.find_element(By.XPATH,"//input[@id='search_button']").click()
    maxpage=driver.find_element(By.XPATH,"//div[@class='page-number']/span[2]").text
    maxpage=int(maxpage)+1
    for ab in range(1,maxpage):
        ab=str(ab)
        print('第'+ab+'页')
        next_page()   
    driver.quit()    
def next_page():    
    for i in range(0,14):
        i=str(i)
        try:
            save_message(i)
        except:
            save_message(i)
    driver.find_element(By.XPATH,"//span[text()='下一页']").click()
def save_message(i):
    u_role=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[1]/div[1]/a/h3").text
    postion=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[1]/div[1]/a/span/em").text
    company=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[2]/div[1]/a").text
    company_role=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[2]/div[2]").text
    company_url=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[2]/div[1]/a").get_attribute('href')
    money=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']").get_attribute('data-salary')
    u_require=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[1]/div[1]/div[2]/div").text
    remarkers=driver.find_element(By.XPATH,"//li[@data-index='"+i+"']/div[2]/div[2]").text
    a_message=[u_role,money,u_require,postion,company,remarkers,company_role,company_url]
    print(a_message)
if __name__ == '__main__':
    located()
  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值