selenium使用
导入
from selenium import webdriver
使用
drive_path=r"C:\Program Files\Python37\Chrome Drive\chromedriver.exe"
driver=webdriver.Chrome(drive_path)
打开网页
driver.get("http://www.baidu.com")
获取元素
inputTag=driver.find_element_by_id("kw")
inputTag=driver.find_element_by_name("wd")
inputTag=driver.find_element_by_class_name("s_ipt")
inputTag=driver.find_element_by_css_selector(".s_ipt")
inputTag=driver.find_element_by_link_text("新闻")
inputTag=driver.find_element_by_partial_link_text("闻")
inputTag=driver.find_element_by_xpath('//*[@id="kw"]')
对获取的元素传入值
inputTag.send_keys("python")
关闭页面
driver.close()
driver.quit()
清除元素
inputTag.clear()
完整的步骤
from selenium import webdriver
from selenium.webdriver.support.ui import Select
import time
driver_path=r"C:\Program Files\Python37\Chrome Drive\chromedriver.exe"
chrome_drive=webdriver.Chrome(executable_path=driver_path)
chrome_drive.get("http://www.baidu.com")
chrome_drive.maximize_window()
chrome_drive.
inputTag=chrome_drive.find_element_by_id("kw")
inputTag.send_keys("python3")
cookies_list=chrome_drive.get_cookies()
print("cookies_list: {}".format(cookies_list))
cookies_dict={cookie["name"]:cookie["value"] for cookie in cookies_list}
print("cookies_dict: {}".format(cookies_dict))
time.sleep(2)
page_source=chrome_drive.page_source
with open("./page_source01.html","w",encoding="utf8") as f:
f.write(page_source)
page_current_url=chromedrive.current_url
with open("./page_current_url.html","w",encoding="utf8") as f:
f.write(page_current_url)
chromedrive.save_screenshot("baidu.png")
chromedrive.quit()
SSL验证警告和重定向
requests.packages.urllib3.disable_warnings(InsecureRequestWarning)
verify=False
allow_redirects=False
使用无头浏览器
from selenium import webdriver
from time import sleep
from selenium.webdriver.chrome.options import Options
chrome_options = Options()
chrome_options.add_argument('--headless')
chrome_options.add_argument('--disable-gpu')
driver = webdriver.Chrome(r'chromedriver.exe',chrome_options=chrome_options)
driver.get('https://www.cnblogs.com/')
print(driver.page_source)
防止检测selenium
from selenium import webdriver
from selenium.webdriver import ChromeOptions
from time import sleep
option = ChromeOptions()
option.add_experimental_option('excludeSwitches', ['enable-automation'])
driver = webdriver.Chrome(r'chromedriver.exe',options=option)
driver.get('https://www.taobao.com/')