python selenium webdriver 手册文档

最新推荐文章于 2024-06-26 16:02:37 发布

南通SEO

最新推荐文章于 2024-06-26 16:02:37 发布

阅读量2.5k

点赞数 2

分类专栏：文档手册文章标签：文档手册

文档手册专栏收录该内容

27 篇文章 0 订阅

订阅专栏

python selenium webdriver 手册文档

1.安装与配置

pip install selenium

基本使用selenium都是为了动态加载网页内容用于爬虫，所以一般也会用到phantomjs

mac下如果要配置phantomjs环境的话

echo $PATH

ln -s

至于chromeDriver，配置方法类似，下载地址：

https://npm.taobao.org/mirrors/chromedriver/

2.代码样例

复制代码

#!/usr/bin/env Python

# coding=utf-8

from selenium import webdriver

from selenium.webdriver.common.keys import Keys

import time

keyword = '家有'.decode('utf-8')

chrome_options = webdriver.ChromeOptions()

# chrome_options.binary_location = "C:\\Program Files (x86)\\Google\\Application\\chrome.exe"

# chrome_options.add_argument('--user-agent=iphone')

# chrome_options.add_argument('--proxy-server=http://61.155.164.110:3128')

#driver = webdriver.Ie()

#driver = webdriver.Firefox()

driver = webdriver.Chrome(chrome_options=chrome_options)

driver.get('http://www.baidu.com')

driver.find_element_by_id('kw').clear()

time.sleep(1)

driver.find_element_by_id('kw').send_keys(keyword)

time.sleep(3)

#driver.find_element_by_id('su').send_keys(Keys.ENTER)

driver.find_element_by_id('su').click()

print driver.title

# driver.quit()

复制代码

3.api速查

3.1定位元素

3.1.1 通过id查找：

element = driver.find_element_by_id("coolestWidgetEvah")

or

from selenium.webdriver.common.by import By

element = driver.find_element(by=By.ID, value="coolestWidgetEvah")

3.1.2 通过class查找

cheeses = driver.find_elements_by_class_name("cheese")

or

from selenium.webdriver.common.by import By

cheeses = driver.find_elements(By.CLASS_NAME, "cheese")

3.1.3 通过标签名称查找

target_div = driver.find_element_by_tag_name("div")

or

from selenium.webdriver.common.by import By

target_div = driver.find_element(By.TAG_NAME, "div")

3.1.4 通过name属性查找

btn = driver.find_element_by_name("input_btn")

or

from selenium.webdriver.common.by import By

btn = driver.find_element(By.NAME, "input_btn")

3.1.5 通过链接的内容查找

next_page = driver.find_element_by_link_text("下一页")

or

from selenium.webdriver.common.by import By

next_page = driver.find_element(By.LINK_TEXT, "下一页")

3.1.6 通过链接的部分内容查找

next_page = driver.find_element_by_partial_link_text("去下一页")

or

from selenium.webdriver.common.by import By

next_page = driver.find_element(By.PARTIAL_LINK_TEXT, "下一页")

3.1.7 通过css查找

cheese = driver.find_element_by_css_selector("#food span.dairy.aged")

or

from selenium.webdriver.common.by import By

cheese = driver.find_element(By.CSS_SELECTOR, "#food span.dairy.aged")

3.1.8 通过xpath查找

inputs = driver.find_elements_by_xpath("//input")

or

from selenium.webdriver.common.by import By

inputs = driver.find_elements(By.XPATH, "//input")

3.1.9 通过js查找

labels = driver.find_elements_by_tag_name("label")

inputs = driver.execute_script(

"var labels = arguments[0], inputs = []; for (var i=0; i < labels.length; i++){" +

"inputs.push(document.getElementByIdx_x_x(labels[i].getAttribute('for'))); } return inputs;", labels)

3.2 获取元素的文本信息

element = driver.find_element_by_id("element_id")

element.text

3.3 修改userAgent

profile = webdriver.FirefoxProfile()

profile.set_preference("general.useragent.override", "some UA string")

driver = webdriver.Firefox(profile)

3.4 cookies

复制代码

# Go to the correct domain

driver.get("http://www.example.com")

# Now set the cookie. Here's one for the entire domain

# the cookie name here is 'key' and its value is 'value'

driver.add_cookie({'name':'key', 'value':'value', 'path':'/'})

# additional keys that can be passed in are:

# 'domain' -> String,

# 'secure' -> Boolean,

# 'expiry' -> Milliseconds since the Epoch it should expire.

# And now output all the available cookies for the current URL

for cookie in driver.get_cookies():

print "%s -> %s" % (cookie['name'], cookie['value'])

# You can delete cookies in 2 ways

# By name

driver.delete_cookie("CookieName")

# Or all of them

driver.delete_all_cookies()

最后放一个自己的代码样例好了，完成的功能为找到搜索框输入搜索关键词然后点击搜索按钮，然后打开每个搜索结果并且输出网页源代码

# coding=utf-8

import time

from selenium import webdriver

from selenium.common.exceptions import TimeoutException

from selenium.webdriver.support.ui import WebDriverWait # available since 2.4.0

from selenium.webdriver.support import expected_conditions as EC # available since 2.26.0

# Create a new instance of the Firefox driver

driver = webdriver.Chrome()

# go to the home page

driver.get("http://www.baidu.com")

#获得当前窗口句柄

nowhandle = driver.current_window_handle

print driver.title

# find the element that's name attribute is qymc (the search box)

inputElement = driver.find_element_by_name("qymc")

print inputElement

# type in the search

inputElement.send_keys(u"加油网")

driver.find_element_by_name("imageField").click();

# submit the form (compare with google we can found that the search is not a standard form and can not be submitted, we do click instead)

# inputElement.submit()

try:

# overlap will happen if we do not move the page to the bottom

# the last link will be under another unrelevant link if we do not scroll to the bottom

driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

#find all link and click them

for item in driver.find_elements_by_xpath('//*[@id="pagetest2"]/div/table/tbody/tr/td/a'):

item.click()

time.sleep(10)

#获取所有窗口句柄

allhandles=driver.window_handles

#在所有窗口中查找新开的窗口

for handle in allhandles:

if handle!=nowhandle:

#这两步是在弹出窗口中进行的操作，证明我们确实进入了

driver.switch_to_window(handle)

print driver.page_source

#返回到主窗口页面

driver.switch_to_window(nowhandle)

finally:

driver.quit()

关注

2
点赞
踩
9

收藏

觉得还不错? 一键收藏
0
评论
python selenium webdriver 手册文档

python selenium webdriver 手册文档 1.安装与配置 pip install selenium 基本使用selenium都是为了动态加载网页内容用于爬虫，所以一般也会用到phantomjs mac下如果要配置phantomjs环境的话 echo $PATH ln -s 至于chromeDriver，配置方法类似，下载地...
复制链接

扫一扫

专栏目录

评论

被折叠的条评论为什么被折叠?

到【灌水乐园】发言

查看更多评论

添加红包

成就一亿技术人!

hope_wisdom

发出的红包

实付元

使用余额支付

点击重新获取

扫码支付

钱包余额 0

抵扣说明：

1.余额是钱包充值的虚拟货币，按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载，可以购买VIP、付费专栏及课程。