#很多初学selenium爬虫的小伙伴可能会在读取element时遇到StaleElementReferenceException报错
看了许多CSDN文章发现都没有解决我的问题,在这里我提供一种解决方式
import time
from selenium import webdriver#selenium库需要环境配置
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import StaleElementReferenceException
import pandas as pd
def get_data():
options = webdriver.ChromeOptions()
driver = webdriver.Chrome(options=options)
driver.get("https://data.eastmoney.com/report/industry.jshtml")#动态页面
time.sleep(0.5)
for i in range(3):
# 获取日期数据
def get_date():
elements = driver.find_elements(By.XPATH, "//div[@id='industry_table']/table[@class='table-model']/tbody//tr//td[10]")
for element in elements:
date.append(element.text)
# 获取行业
def get_industry():
elements = driver.find_elements(By.XPATH, "//div[@id='industry_table']/table[@class='table-model']/tbody//tr//td[9]")
for element in elements:
industry.append(element.text)
# 获取评级
def get_rating():
elements = driver.find_elements(By.XPATH, "//div[@id='industry_table']/table[@class='table-model']/tbody//tr//td[6]")
for element in elements:
rating.append(element.text)
# 获取评级变动
def get_rating_change():
elements = driver.find_elements(By.XPATH, "//div[@id='industry_table']/table[@class='table-model']/tbody//tr//td[7]")
for element in elements:
rating_change.append(element.text)
time.sleep(0.4)
get_date()
time.sleep(0.5)
get_industry()
time.sleep(0.6)
get_rating()
time.sleep(0.5)
get_rating_change()
time.sleep(0.7)
由于在selenium中读取网页的HTML
driver = webdriver.Chrome()
driver.get("https://data.eastmoney.com/report/industry.jshtml")
或者读取元素elements
elements = driver.find_elements(By.XPATH, "//div[@id='industry_table']/table[@class='table-model']/tbody//tr//td[1]")
在发出请求后浏览器会有一个加载时间
由于代码的运行速度快于浏览器加载速度
如果程序在此时执行会出现没有找到元素(StaleElementReferenceException)的报错
因此在每一次进行driver.get(url)或driver.find_elements( )操作时,通过time.sleep()设置一个等待时间,就可以完美解决报错