self.browser.get(self.INDEX_URL)
js = "return action=document.body.scrollHeight"
height = 0
new_height = self.browser.execute_script(js)
while height < new_height:
# 将滚动条调整至页面底部
for i in range(height, new_height, 100):
self.browser.execute_script('window.scrollTo(0, {})'.format(i))
time.sleep(0.05)
height = new_height
time.sleep(1)
new_height = self.browser.execute_script(js)
html = self.browser.page_source
self.get_data(html)
当页面有多个下拉框的时候,使用定位元素下拉的方式,这时候的window.scrollTo是不能用的
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from loguru import logger
from selenium.webdriver.common.action_chains import ActionChains
from selenium.webdriver.common.keys import Keys
import random
import re
from urllib.parse import quote
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.common.by import By
chrome_path = r'F:\PythonProject\data-auto\ProjectSetting\chromedriver.exe'
driver = webdriver.Chrome(executable_path=chrome_path)
driver.get("https://www.baidu.com/s?wd=data&rsv_spt=1&rsv_iqid=0x96412e19001eb086&issp=1&f=8&rsv_bp=1&rsv_idx=2&ie=utf-8&tn=baiduhome_pg&rsv_enter=1&rsv_dl=tb&rsv_sug3=3&rsv_sug1=1&rsv_sug7=100&rsv_sug2=0&rsv_btype=i&inputT=956&rsv_sug4=1595")
# 定位目标元素
# target_element = driver.find_element_by_xpath('//div[@class="cr-content new-pmd "]')
wait = WebDriverWait(driver, 20, 0.01)
target_element = wait.until(EC.presence_of_element_located((
By.XPATH, '//div[@class="cr-content new-pmd "]'
)))
scroll_position = target_element.location["y"]
scroll_script = """
function smoothScrollTo(element, duration) {
const startY = window.pageYOffset;
const targetY = element.getBoundingClientRect().top + window.pageYOffset;
const diff = targetY - startY;
const scrollStep = 50; // 修改滚动的像素数
const pauseTime = 50; // 修改停顿的时间间隔
let count = 0, curPos;
function scroll() {
if (Math.sign(diff) === 1 && count < diff) {
count += scrollStep * Math.sign(diff);
curPos = startY + count;
if (curPos > targetY) {
curPos = targetY;
}
window.scrollTo(0, curPos);
setTimeout(scroll, pauseTime);
}
}
scroll();
}
// 调用滚动函数,滚动到目标位置
smoothScrollTo(arguments[0], arguments[1]);
"""
driver.execute_script(scroll_script, target_element, scroll_position)
当上面的实现不成功之后,利用终极大法:
直接定位所有的li,然后下拉循环
links = wait.until(
EC.presence_of_all_elements_located((
By.XPATH, '//li[@class="artdeco-list__item pl3 pv3 "]'
))
)
for lk in range(len(links)):
browser.execute_script("arguments[0].scrollIntoView({behavior: 'smooth'});", links[lk])
time.sleep(0.04)