插件下载
驱动下载地址:http://chromedriver.storage.googleapis.com/index.html
selenium安装: pip install selenium
API
1.Sample
# 开启浏览器
driver_path = "chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.get("https://www.baidu.com")
# 不开启浏览器
option = webdriver.ChromeOptions()
option.add_argument("headless")
driver = webdriver.Chrome(options=option)
driver.get("https://www.baidu.com")
2.关闭浏览器
# 关闭当前页面
driver.quit()
# 关闭浏览器
driver.close()
3.定位元素
find_element 是获取第一个满足条件的元素
find_elements 是获取所有满足条件的元素
# 根据id 查找元素
driver.find_element_by_id("kw")
# 根据name 属性来查找元素
driver.find_element_by_name("kw")
# 根据类型查找元素
driver.find_element_by_class_name("kw")
# 根据标签名来查找元素
driver.find_element_by_tag_name("div")
# 根据xpath语法来获取元素
driver.find_element_by_xpath("//div")
# 根据css选择器来获取元素
# 选择器下的input元素
driver.find_element_by_css_selector(".quickdelete-wrap > input")
4.操作表单元素
普通操作
# 根据id 查找元素
search_ele = driver.find_element_by_id("kw")
# 清除
search_ele.clear()
# 输入
search_ele.send_keys("Python")
# 点击
driver.find_element_by_id("su").click()
Select
Select Demo
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Select Demo</title>
</head>
<body>
<script>
function t(name) {
var selecteItem = document.getElementById("selectTest"); //拿到select对象
var index = selecteItem.selectedIndex ; //拿到选中项的索引
var selectedValue = selecteItem.options[index].value; //拿到选中项options的value
alert(selectedValue)
}
</script>
<form>
<select id="selectTest" onchange="t(this)">
<option value="https://www.baidu.com">百度</option>
<option value="https://www.google.com">谷歌</option>
<option value="https://www.sina.com.cn/">新浪</option>
</select>
</form>
</body>
</html>
from selenium import webdriver
from selenium.webdriver.support.ui import Select
if __name__ == "__main__":
driver_path = "chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.get("http://localhost:5000/select")
# 根据id 查找元素
select_ele = Select(driver.find_element_by_id("selectTest"))
# 根据索引选择
# select_ele.select_by_index(1)
# 根据可视文本查找
# select_ele.select_by_visible_text("谷歌")
# 根据value值查找
# select_ele.select_by_value("https://
# 取消所有选项
# select_ele.deselect_all()
5.行为链
from selenium import webdriver
from selenium.webdriver.common.action_chains import ActionChains
if __name__ == "__main__":
driver_path = "chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.get("https://www.baidu.com")
# 行为链
input_tag = driver.find_element_by_id("kw")
click_btn = driver.find_element_by_id("su")
actions = ActionChains(driver)
actions.move_to_element(input_tag)
actions.send_keys_to_element(input_tag,"Python")
actions.move_to_element(click_btn)
actions.click(click_btn)
actions.perform()
click_and_hold 鼠标点击但不松开
context_click 右击
double_click 双击
更多方法参考 https://selenium-python.readthedocs.io/api.html
6.Cookie
from selenium import webdriver
if __name__ == "__main__":
driver_path = "chromedriver.exe"
driver = webdriver.Chrome(driver_path)
driver.get("https://www.baidu.com")
# cookie
# 获取所有cookie
all_cookies = driver.get_cookies()
for cookie in all_cookies:
print(cookie)
# 根据key获取cookie
cookie = driver.get_cookie("BAIDUID")
print(cookie)
# 删除cookie
driver.delete_cookie("BAIDUID")
# 删除所有cookie
driver.delete_all_cookies()
7.等待
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
if __name__ == "__main__":
driver_path = "chromedriver.exe"
driver = webdriver.Chrome(driver_path)
# 隐式等待 在获取不可用元素之前先等待10s
driver.implicitly_wait(10)
driver.get("https://www.baidu.com")
# 显式等待 某个条件成立才执行获取元素的操作
try:
element =WebDriverWait(driver,10).until(
EC.presence_of_element_located((By.ID,"kw"))
)
finally:
driver.close()
presence_of_element_located 某个元素已加载完毕
presence_all_of_element_located 页面中所有满足条件的元素已经加载完毕
element_to_be_clickable 某个元素可以点击了
更多方法参考 https://selenium-python.readthedocs.io/waits.html
8.切换页面
from selenium import webdriver
if __name__ == "__main__":
driver_path = "chromedriver.exe"
driver = webdriver.Chrome(driver_path)
url = "https://www.baidu.com"
driver.get(url)
# 打开一个新页面
driver.execute_script("window.open('" + url + "')")
# 切换至新页面
driver.switch_to_window(driver.window_handles[1])
# 输入Python
driver.find_element_by_id("kw").send_keys("Python")
# 点击
driver.find_element_by_id("su").click()
9.设置代理ip
from selenium import webdriver
if __name__ == "__main__":
driver_path = "chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument("--proxy-server=http://124.235.181.175:80")
driver = webdriver.Chrome(driver_path,options=options)
url = "http://httpbin.org/ip"
driver.get(url)