爬取boss直聘上的“python爬虫”的岗位信息,在点击下一页的内容时出现如下错误selenium.common.exceptions.WebDriverException: Message: unknown error: Element <a href="/c100010000/?query=python%E7%88%AC%E8%99%AB&page=2" ka="page-next" class="next"></a> is not clickable at point (582, 546). Other element would receive the click: <div class="job-primary">...</div>
nextbtn.click()出现错误无法正常点击按钮,改用方法 nextbtn.send_keys("\n")#点击按钮可以正常使用。
代码如下
from selenium import webdriver
import random
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from lxml import etree
import re
import json
driver = webdriver.Chrome()#往浏览器中添加了代理
class Boss_Spider(object):
def __init__(self):
self.driver = driver
self.url = 'https://www.zhipin.com/c100010000/?query=python%E7%88%AC%E8%99%AB&page=1&ka=page-1'
self.i = 0
def run(self):
self.driver.get(self.url)
while True:
WebDriverWait(self.driver,timeout= 50).until(EC.presence_of_element_located((By.CLASS_NAME,"next")))
source = self.driver.page_source
self.parse_source(source)
nextbtn = self.driver.find_element_by_class_name('next')
if "next_disabled" in nextbtn.get_attribute("class"):
break
else:
# nextbtn.click()出现错误
nextbtn.send_keys("\n")#点击按钮
def parse_source(self,source):
text = etree.HTML(source)
li_lists = text.xpath('//div[@class = "job-list"]/ul/li')
for list in li_lists:
href = list.xpath('.//div/div[1]/h3/a/@href')[0]
newurl = 'https://www.zhipin.com/' + href
self.parse_detail(newurl)
def parse_detail(self,newurl):
self.driver.execute_script("window.open('%s')" % newurl) # 跳转页面
self.driver.switch_to_window(self.driver.window_handles[1]) # 将页面跳转到打开的详情页'//*[@id="main"]/div[1]/div/div/div[2]/p'
WebDriverWait(self.driver,timeout=50).until(EC.presence_of_element_located((By.XPATH,('//div[@class="info-primary"]/p'))))
content = self.driver.page_source
text1 = etree.HTML(content)
zhiwei = text1.xpath('//*[@id="main"]/div[1]/div/div/div[2]/div[2]/h1/text()')[0]
city = text1.xpath('//div[@class="info-primary"]/p/text()')[0]
work_year = text1.xpath('//div[@class="info-primary"]/p/text()')[1]
educate = text1.xpath('//div[@class="info-primary"]/p/text()')[2]
zhiwei_detail = text1.xpath('//*[@id="main"]/div[3]/div/div[2]/div[2]/div[1]/div/text()')[1:]
zhiwei_detail = str(zhiwei_detail)
zhiwei_detail = re.sub(r'["\s"|"\n"|"\\n"]','',zhiwei_detail)
zhiwei_detail = zhiwei_detail.strip('""')
zhiwei_detail = zhiwei_detail.strip('[]')
company_jianjie = text1.xpath('//div[@class = "job-tags"]/span/text()')[0:]
company_jianjie = str(company_jianjie)
company_jianjie = re.sub(r'["\s"|"\n"]','',company_jianjie)
job_location = text1.xpath('//div[@class = "job-location"]/div/text()')[0]
salary = text1.xpath('//span[@class="salary"]/text()')
salary = str(salary)
salary = re.sub(r'["\s"|"\n"|"\\n"]', '', salary)
salary = salary.strip('""')
salary = salary.strip('[]')
item = {
'薪水':salary,
'职位名称':zhiwei,
'工作城市':city,
'工作经验':work_year,
'教育背景':educate,
'职位描述':zhiwei_detail,
'公司简介':company_jianjie,
'工作地点':job_location
}
fp = open('boss1.txt','a',encoding='utf8')#以追加的方式将数据写入文件中
json.dump(item,fp,indent=4,ensure_ascii=False)
fp.close()
self.driver.close()
self.driver.switch_to_window(self.driver.window_handles[0])
if __name__ == '__main__':
spider = Boss_Spider()
spider.run()
文章最后附上我解决错误查找的博客链接:
https://blog.csdn.net/fengqiaoxian/article/details/81104173