python速查手册_selenium 速查手册 python版

1.安装与配置

pip install selenium

基本使用selenium都是为了动态加载网页内容用于爬虫,所以一般也会用到phantomjs

mac下如果要配置phantomjs环境的话

echo $PATH

ln -s

至于chromeDriver,配置方法类似,下载地址:

https://sites.google.com/a/chromium.org/chrom selenium import webdriver

2.代码样例

from selenium importwebdriverfrom selenium.common.exceptions importTimeoutExceptionfrom selenium.webdriver.support.ui import WebDriverWait #available since 2.4.0

from selenium.webdriver.support import expected_conditions as EC #available since 2.26.0

#Create a new instance of the Firefox driver

driver =webdriver.Firefox()#go to the google home page

driver.get("http://www.google.com")#the page is ajaxy so the title is originally this:

printdriver.title#find the element that's name attribute is q (the google search box)

inputElement = driver.find_element_by_name("q")#type in the search

inputElement.send_keys("cheese!")#submit the form (although google automatically searches now without submitting)

inputElement.submit()try:#we have to wait for the page to refresh, the last thing that seems to be updated is the title

WebDriverWait(driver, 10).until(EC.title_contains("cheese!"))#You should see "cheese! - Google Search"

printdriver.titlefinally:

driver.quit()

3.api速查

3.1定位元素

3.1.1 通过id查找:

element = driver.find_element_by_id("coolestWidgetEvah")or

from selenium.webdriver.common.by importBy

element= driver.find_element(by=By.ID, value="coolestWidgetEvah")

3.1.2 通过class查找

cheeses = driver.find_elements_by_class_name("cheese")or

from selenium.webdriver.common.by importBy

cheeses= driver.find_elements(By.CLASS_NAME, "cheese")

3.1.3 通过标签名称查找

target_div = driver.find_element_by_tag_name("div")or

from selenium.webdriver.common.by importBy

target_div= driver.find_element(By.TAG_NAME, "div")

3.1.4 通过name属性查找

btn = driver.find_element_by_name("input_btn")or

from selenium.webdriver.common.by importBy

btn= driver.find_element(By.NAME, "input_btn")

3.1.5 通过链接的内容查找

next_page = driver.find_element_by_link_text("下一页")or

from selenium.webdriver.common.by importBy

next_page= driver.find_element(By.LINK_TEXT, "下一页")

3.1.6 通过链接的部分内容查找

next_page = driver.find_element_by_partial_link_text("去下一页")or

from selenium.webdriver.common.by importBy

next_page= driver.find_element(By.PARTIAL_LINK_TEXT, "下一页")

3.1.7 通过css查找

cheese = driver.find_element_by_css_selector("#food span.dairy.aged")or

from selenium.webdriver.common.by importBy

cheese= driver.find_element(By.CSS_SELECTOR, "#food span.dairy.aged")

3.1.8 通过xpath查找

inputs = driver.find_elements_by_xpath("//input")or

from selenium.webdriver.common.by importBy

inputs= driver.find_elements(By.XPATH, "//input")

3.1.9 通过js查找

labels = driver.find_elements_by_tag_name("label")

inputs=driver.execute_script("var labels = arguments[0], inputs = []; for (var i=0; i < labels.length; i++){" +

"inputs.push(document.getElementById(labels[i].getAttribute('for'))); } return inputs;", labels)

3.2 获取元素的文本信息

element = driver.find_element_by_id("element_id")

element.text

3.3 修改userAgent

profile =webdriver.FirefoxProfile()

profile.set_preference("general.useragent.override", "some UA string")

driver= webdriver.Firefox(profile)

3.4 cookies

#Go to the correct domain

driver.get("http://www.example.com")#Now set the cookie. Here's one for the entire domain#the cookie name here is 'key' and its value is 'value'

driver.add_cookie({'name':'key', 'value':'value', 'path':'/'})#additional keys that can be passed in are:#'domain' -> String,#'secure' -> Boolean,#'expiry' -> Milliseconds since the Epoch it should expire.

#And now output all the available cookies for the current URL

for cookie indriver.get_cookies():print "%s -> %s" % (cookie['name'], cookie['value'])#You can delete cookies in 2 ways#By name

driver.delete_cookie("CookieName")#Or all of them

driver.delete_all_cookies()

最后放一个自己的代码样例好了,完成的功能为找到搜索框输入搜索关键词然后点击搜索按钮,然后打开每个搜索结果并且输出网页源代码

#coding=utf-8

importtimefrom selenium importwebdriverfrom selenium.common.exceptions importTimeoutExceptionfrom selenium.webdriver.support.ui import WebDriverWait #available since 2.4.0

from selenium.webdriver.support import expected_conditions as EC #available since 2.26.0

#Create a new instance of the Firefox driver

driver =webdriver.Chrome()#go to the home page

driver.get("http://www.zjcredit.gov.cn")#获得当前窗口句柄

nowhandle =driver.current_window_handleprintdriver.title#find the element that's name attribute is qymc (the search box)

inputElement = driver.find_element_by_name("qymc")printinputElement#type in the search

inputElement.send_keys(u"同花顺")

driver.find_element_by_name("imageField").click();#submit the form (compare with google we can found that the search is not a standard form and can not be submitted, we do click instead)#inputElement.submit()

try:#overlap will happen if we do not move the page to the bottom

#the last link will be under another unrelevant link if we do not scroll to the bottom

driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")#find all link and click them

for item in driver.find_elements_by_xpath('//*[@id="pagetest2"]/div/table/tbody/tr/td/a'):

item.click()

time.sleep(10)#获取所有窗口句柄

allhandles=driver.window_handles#在所有窗口中查找新开的窗口

for handle inallhandles:if handle!=nowhandle:#这两步是在弹出窗口中进行的操作,证明我们确实进入了

driver.switch_to_window(handle)printdriver.page_source#返回到主窗口页面

driver.switch_to_window(nowhandle)finally:

driver.quit()

添加一个阅读材料好了,写的挺好的

http://www.cnblogs.com/tobecrazy/p/4570494.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值