Selenium库是一个自动化测试工具(模拟浏览器操作),支持多种浏览器。
在爬虫中主要用来解决JavaScript渲染的问题。
声明浏览器对象
from selenium import webdriver
# webdriver其实是一个浏览器驱动对象
# 声明一个浏览器对象
# The selenium.webdriver module provides all the WebDriver implementations. Currently supported WebDriver implementations are Firefox, Chrome, IE and Remote.
browser = webdriver.Chrome()
访问页面
# The driver.get method will navigate to a page given by the URL. WebDriver will wait until the page has fully loaded (that is, the “onload” event has fired) before returning control to your test or script.
browser.get("https://www.baidu.com")
# 打印网页源代码
print(browser.page_source)
browser.close()
查找元素
单个元素
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
input_first = browser.find_element_by_id("q")
input_second = browser.find_element_by_css_selector("#q")
browser.close
# 查找方法:
/*
find_element_by_id
find_element_by_name
find_element_by_xpath
find_element_by_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector
*/
多个元素
# 用法与上述用法相同,把element改为elements即可
/*
find_elements_by_id
find_elements_by_name
find_elements_by_xpath
find_elements_by_link_text
find_elements_by_tag_name
find_elements_by_class_name
find_elements_by_css_selector
*/
# 运行结果以list的形式返回
元素交互操作
# 对获取的元素调用交互方法
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
import time
browser = webdriver.Chrome()
browser.get("https://www.taobao.com")
input = browser.find_element_by_id("q")
input.send_keys("iPhone")
# 等待1秒
time.sleep(1)
input.clear()
input.send_keys("iPad")
button = browser.find_element_by_class_name("btn-search")
button.click()
执行JavaScript
browser.get("https://www.zhihu.com/explore")
browser.execute_script("window.scrollTo(0, document.body.scrollHeight)")
browser.execute_scritp("alert('To Bottom')")
获取元素信息
获取属性
# get_attribute()方法
logo = browser.find_element_by_id("zh-top-link-logo")
print(logo.get_attribute('class'))
获取文本值
input = browser.find_element_by_class_name("zu-top-add-question")
print(input.text)
浏览器选项卡管理
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get("https://www.baidu.com")
browser.execute_script('window.open()')
browser.switch_to_window(browser.window_handles[1])
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to_window(browser.window_handles[0])
browser.get('https://python.org')