-----启动浏览器,打开网页-----
from selenium import webdriver
# 指定webdive 存放位置
path = r'D:\Courses\P9s\selenium\edgedriver_win64\msedgedriver.exe'
# 实例化一个浏览器驱动对象(edge、chrome或者其他)
# 首字母大写
browser = webdriver.Edge(path)
# 打开指定的网页url
browser.get("https://www.baidu.com")
-----截图、关闭浏览器-----
from selenium import webdriver
path = r'D:\Courses\P9s\selenium\edgedriver_win64\msedgedriver.exe'
browser = webdriver.Edge(path)
browser.get('https://www.baidu.com')
# 截图预览
browser.get_screenshot_as_file('截图.png')
browser.close()
-----打开百度,搜索框输入内容,点击搜索-----
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
path = r'D:\Courses\P9s\selenium\edgedriver_win64\msedgedriver.exe'
browser = webdriver.Edge(path)
browser.get('https://www.baidu.com')
ele = browser.find_element(By.ID, 'kw') # 搜索框
ele.send_keys('世界杯' + Keys.RETURN)
-----爬取技术博客-----
from selenium import webdriver
from selenium.webdriver.common.by import By
wd = webdriver.Edge()
wd.get('https://v3u.cn/l_id_1')
articles = wd.find_elements(By.CSS_SELECTOR, 'body>div>section>div>div.col-md-8>articles.blog-post')
for article in articles:
if article.find_element(By.CSS_SELECTOR, 'article>div>h2>a'):
title = article.find_element(By.CSS_SELECTOR, "article > div > h2 > a").text
link = article.find_element(By.CSS_SELECTOR, "article > div > h2 > a").get_attribute("href")
print(title, '---', link)
-----反爬--识别selenium+webdriver-----
webdriver = window.navigator.webdriver;
if(webdriver){
console.log('你这个xx,你以为使用Selenium模拟浏览器,我就不能发现你吗?')
//执行封禁、拉黑代码。。。。
} else {
console.log('正常浏览器')
}
-----使用版本的推荐方法 **find_element()**
**使用的时候需要导入模块 \**from selenium.webdriver.common.by import By\****
属性定位方法
原定位方法find_element_by_*
推荐定位方法find_element()-----
xpath
find_element_by_xpath("//*[@id='search']")
find_element(By.XPATH, "//*[@id='search']")
class_name
find_element_by_class_name("element_class_name")
find_element(By.CLASS_NAME, "element_class_name")
id
find_element_by_id("element_id")
find_element(By.ID,"element_id")
name
find_element_by_name("element_name")
find_element(By.NAME, "element_name")
link_text
find_element_by_link_text("element_link_text")
find_element(By.LINK_TEXT,"element_link_text")
css_selector
find_element_by_css_selector("element_css_selector")
find_element(By.CSS_SELECTOR, "element_css_selector")
tag_name
find_element_by_tag_name("element_tag_name")
find_element(By.TAG_NAME, "element_tag_name")
partial_link_text
ind_element_by_partial_link_text("element_partial_link_text")
find_element(By.PARTIAL_LINK_TEXT, "element_partial_link_text")