```python
import time
import random
from lxml import etree
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
class QiChe(object):
driver = webdriver.Chrome()
def __init__(self):
self.url = 'https://www.lagou.com/jobs/list_python%E5%B7%A5%E7%A8%8B%E5%B8%88/p-city_215?px=default#filterBox'
self.list = []
def run(self):
self.driver.get(self.url)
wait = WebDriverWait(self.driver, 100)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'body-btn')))
self.driver.find_element_by_class_name('body-btn').click()
time.sleep(2)
while True:
try:
self.details_page()
wait.until(EC.presence_of_element_located((By.CLASS_NAME,'pager_next ')))
self.driver.find_element_by_class_name('pager_next ').click()
time.sleep(random.randint(5,10))
except:
print('最后一页了')
break
def details_page(self):
url_page = self.driver.page_source
url = etree.HTML(url_page)
styles = url.xpath('//div[@class="p_top"]/a/@href')
for style in styles:
self.parse_url(style)
time.sleep(random.randint(3, 6))
def parse_url(self,style):
self.driver.execute_script("window.open('%s')"%style)
self.driver.switch_to.window(self.driver.window_handles[1])
time.sleep(random.randint(3, 7))
url_2 = self.driver.page_source
url_str = etree.HTML(url_2)
company_names = url_str.xpath('//div[@class="job-name"]/h4[@class="company"]/text()')
job_names = url_str.xpath('//div[@class="job-name"]//h1[@class="name"]/text()')
salary_dresss = url_str.xpath('//dd[@class="job_request"]//span/text()')
job_contents = url_str.xpath('//div[@class="job-detail"]/p/text()')
for job_content in job_contents:
print(job_content)
print('-'*50)
self.driver.close()
self.driver.switch_to.window(self.driver.window_handles[0])
if __name__ == '__main__':
qiche =QiChe()
qiche.run()