selenium bs4等待动态页面元素加载示例代码(使用Chrome Options)如下:
#!/usr/bin/env python
# -*- coding: utf-8 -*-
# Python 3.9
# @Time : 2023/5/6 4:02
# @Author : 'Lou Zehua'
# @File : selenium_bs4_chrome_option.py
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
header = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/112.0.0.0 Safari/537.36 Edg/112.0.1722.68",
}
# 1. explorer settings
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
for k, v in header.items():
options.add_argument(f'{k}={v}')
# 2. browser driver
browser = webdriver.Chrome(chrome_options=options)
url = "https://httpbin.org/get?key1=value1"
browser.get(url)
print("start...")
# implicitly wait, until all elements located
browser.implicitly_wait(10)
print("implicitly wait.")
# specifically wait, until all elements of the specific pattern located
wait = WebDriverWait(browser, 10, poll_frequency=0.5)
wait.until(EC.presence_of_all_elements_located((By.XPATH,'//meta[@name="color-scheme"]')))
print("specifically wait.")
soup = BeautifulSoup(browser.page_source, 'html.parser')
browser.quit()
# 3. get text
print("----body----")
soup.prettify()
print(soup.get_text())