selenium爬取拉勾网职位招聘信息
# encoding:utf-8
from selenium import webdriver
from lxml import etree
from pyquery import PyQuery as pq
# 引入显式等待
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import time
class LagouSpider(object):
driver_path = r'D:\chromedriver\chromedriver.exe'
def __init__(self):
self.driver = webdriver.Chrome(executable_path=self.driver_path)
self.url = 'https://www.lagou.com/jobs/list_python?labelWords=&fromSearch=true&suginput='
self.positions = []
def run(self):
'''
程序开始函数
:return:
'''
self.driver.get(self.url)
while True:
source = self.driver.page_source
self.parse_list_page(source)
# 显示等待,直到指定的元素被加载成功,若没有加载成功超过指定的时间