python爬虫——selenium库

1.简介。
自动化测试工具,支持多种浏览器,爬虫中主要用来解决JavaScript渲染问题。(ps:要调用游览器必须先下载相应的driver.exe文件并把它放进python的安装目录!)
2.代码。
基本使用

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

browser = webdriver.Chrome()
try:
    browser.get("http://www.baidu.com")
    input = browser.find_element_by_id('kw')
    input.send_keys('Python')
    input.send_keys(Keys.ENTER)
    wait = WebDriverWait(browser,10)
    wait.until(EC.presence_of_all_elements_located((By.ID,'content_left')))
    print(browser.current_url)
    print(browser.get_cookies())
    print(browser.page_source)
finally:
    browser.close()

声明浏览器对象和访问页面

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
browser = webdriver.Chrome() #声明谷歌浏览器对象
browser.get('https://www.taobao.com')
print(browser.page_source)   #打印源码
browser.close()   #关闭浏览器

查找元素 —— 单个元素

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
browser = webdriver.Chrome()  #声明浏览器对象
browser.get('https://www.taobao.com')
input_first = browser.find_element_by_id('q')
input_second = browser.find_element_by_css_selector('#q')
input_third = browser.find_element_by_xpath('//*[@id="q"]')
print(input_first,input_second,input_third)
browser.close()   #关闭浏览器

运行结果:

D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s2.py
<selenium.webdriver.remote.webelement.WebElement (session="bd728922e48649f1839dc4b9d5cf6436", element="0.7649982219346745-1")> <selenium.webdriver.remote.webelement.WebElement (session="bd728922e48649f1839dc4b9d5cf6436", element="0.7649982219346745-1")> <selenium.webdriver.remote.webelement.WebElement (session="bd728922e48649f1839dc4b9d5cf6436", element="0.7649982219346745-1")>

Process finished with exit code 0

查找元素 —— 多个元素

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
lis = browser.find_elements_by_css_selector('.service-bd li')
print(lis)
browser.close()
D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s3.py
[<selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-1")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-2")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-3")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-4")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-5")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-6")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-7")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-8")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-9")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-10")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-11")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-12")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-13")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-14")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-15")>, <selenium.webdriver.remote.webelement.WebElement (session="99ad19904aae88b84fec67a760d811e6", element="0.36740963219874057-16")>]

Process finished with exit code 0

元素交互操作——对获取的元素调用交互方法

#!/usr/bin/python
# -*- coding: UTF-8 -*-
import time
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.taobao.com')
input = browser.find_element_by_id('q')
input.send_keys('iPhone')
time.sleep(1)
input.clear()
input.send_keys('iPad')
button = browser.find_element_by_class_name('btn-search')
button.click()

交互动作——将动作附加到动作链中串行执行

#!/usr/bin/python
# -*- coding: UTF-8 -*-
#实现拖拽
from selenium.webdriver import ActionChains
from selenium import webdriver

browser = webdriver.Chrome()
url = "http://www.runoob.com/try/try.php?filename=jqueryui-api-droppable"
browser.get(url)
browser.switch_to.frame('iframeResult')   #进入到ifrname标签中的id="iframeResult"元素中
source = browser.find_element_by_css_selector('#draggable')  #被拖拽对象
target = browser.find_element_by_css_selector('#droppable')   #拖拽的目标
actions = ActionChains(browser)     #调用函数声明对象
actions.drag_and_drop(source,target)  #调用函数把source拖拽到target
actions.perform()   #执行拖拽动作

执行JavaScript

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('http://www.zhihu.com/explore')
browser.execute_script('window.scrollTo(0,document.body.scrollHeight)')
browser.execute_script('alert("To Bottom")')

获取元素信息——获取属性

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
from selenium.webdriver import ActionChains

browser = webdriver.Chrome()
url = 'http://www.zhihu.com/explore'
browser.get(url)
logo = browser.find_element_by_id('zh-top-link-logo')
print(logo)
print(logo.get_attribute('class'))

结果:

D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s7.py
<selenium.webdriver.remote.webelement.WebElement (session="b1bebb30bb020339bf2d7693620c5002", element="0.047356492735414424-1")>
zu-top-link-logo

Process finished with exit code 0

获取元素信息——获取文本值

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
from selenium.webdriver import ActionChains
browser = webdriver.Chrome()
url = 'http://www.zhihu.com/explore'
browser.get(url)
input = browser.find_element_by_class_name('zu-top-add-question')
print(input.text)

结果:

D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s8.py
提问

Process finished with exit code 0

获取元素信息——获取ID、位置、标签名、大小

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
browser = webdriver.Chrome()
url = 'http://www.zhihu.com/explore'
browser.get(url)
input = browser.find_element_by_class_name('zu-top-add-question')
print(input.id)
print(input.location)
print(input.tag_name)
print(input.size)

结果:

D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s9.py
0.981153209857224-1
{'x': 758, 'y': 7}
button
{'height': 32, 'width': 66}

Process finished with exit code 0

前进后退

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.get('http://www.taobao.com')
browser.get('http://www.baidu.com')
browser.get('http://www.zhihu.com')
browser.back()  #后退
time.sleep(1)   #睡眠一秒
browser.forward()  #前进一布
browser.close()

Cookies

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
browser = webdriver.Chrome()
browser.get('https://www.zhihu.com/explore')
print(browser.get_cookies())   #获取cookies
browser.add_cookie({'name':'dwj','domain':'www.zhihu.com','value':'germey'})  #添加cookies
print(browser.get_cookies())
browser.delete_all_cookies()  #删除所有的cookies
print(browser.get_cookies())

结果:

D:\Anaconda3\python.exe C:/Users/lenovo/PycharmProjects/爬虫/s11.py
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1543737715.05855, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '53d8274aa4a304c1aeff9b999b2aaa0a'}, {'domain': '.zhihu.com', 'expiry': 1543738618, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1543736818'}, {'domain': '.zhihu.com', 'expiry': 1638344815.058755, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': 'f25fb2b060c848b6b6ce5ab4b95b6369|1543736810000|1543736810000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '7639300af6920ffa63978e25321aa41b'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060298, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"OTc4ODJmYmZhZTVhNDdlMDgwMzkxODQ2ZTBkNGJjMTE=|1543736810|303ecf1cc48a8709da272dcebe8884ecb8d48a88"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060384, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"MDRiYWE4NTA0ZjVlNDA1OGFhZmQxYTNhYjY2YTVkODA=|1543736810|0f3c805aeecd7ba88f3094e7a1b4a11e944c4101"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060585, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"MTRkZGIzYWI3MDQzNDE4NGFiOThkZTExM2NhNDllNGM=|1543736810|e0857722320b2e1f3329cdb5b298fd4d95de6e85"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1638344817.948379, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AFDiVUWMmw6PTmwXpV-HqPzoef5yS4TXstc=|1543736813"'}, {'domain': '.zhihu.com', 'expiry': 1621496818.358731, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'BkkYmIaUuoFBlmisga7IPW2j6k5DllLB'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1267940103.1543736818.1543736818.1543736818.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1559504818, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1543736818.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20181202=1'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c3079d4f-f991-4026-b3a4-f10aa6d821ef'}]
[{'domain': '.zhihu.com', 'httpOnly': False, 'name': 'l_n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': 'www.zhihu.com', 'expiry': 1543737715.05855, 'httpOnly': False, 'name': 'tgw_l7_route', 'path': '/', 'secure': False, 'value': '53d8274aa4a304c1aeff9b999b2aaa0a'}, {'domain': '.zhihu.com', 'expiry': 1543738618, 'httpOnly': False, 'name': '__utmb', 'path': '/', 'secure': False, 'value': '51854390.0.10.1543736818'}, {'domain': '.zhihu.com', 'expiry': 1638344815.058755, 'httpOnly': False, 'name': 'q_c1', 'path': '/', 'secure': False, 'value': 'f25fb2b060c848b6b6ce5ab4b95b6369|1543736810000|1543736810000'}, {'domain': 'www.zhihu.com', 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': '7639300af6920ffa63978e25321aa41b'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060298, 'httpOnly': False, 'name': 'r_cap_id', 'path': '/', 'secure': False, 'value': '"OTc4ODJmYmZhZTVhNDdlMDgwMzkxODQ2ZTBkNGJjMTE=|1543736810|303ecf1cc48a8709da272dcebe8884ecb8d48a88"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060384, 'httpOnly': False, 'name': 'cap_id', 'path': '/', 'secure': False, 'value': '"MDRiYWE4NTA0ZjVlNDA1OGFhZmQxYTNhYjY2YTVkODA=|1543736810|0f3c805aeecd7ba88f3094e7a1b4a11e944c4101"'}, {'domain': '.zhihu.com', 'expiry': 1546328815.060585, 'httpOnly': False, 'name': 'l_cap_id', 'path': '/', 'secure': False, 'value': '"MTRkZGIzYWI3MDQzNDE4NGFiOThkZTExM2NhNDllNGM=|1543736810|e0857722320b2e1f3329cdb5b298fd4d95de6e85"'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': 'n_c', 'path': '/', 'secure': False, 'value': '1'}, {'domain': '.zhihu.com', 'expiry': 1638344817.948379, 'httpOnly': False, 'name': 'd_c0', 'path': '/', 'secure': False, 'value': '"AFDiVUWMmw6PTmwXpV-HqPzoef5yS4TXstc=|1543736813"'}, {'domain': '.zhihu.com', 'expiry': 1621496818.358731, 'httpOnly': False, 'name': '_xsrf', 'path': '/', 'secure': False, 'value': 'BkkYmIaUuoFBlmisga7IPW2j6k5DllLB'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utma', 'path': '/', 'secure': False, 'value': '51854390.1267940103.1543736818.1543736818.1543736818.1'}, {'domain': '.zhihu.com', 'httpOnly': False, 'name': '__utmc', 'path': '/', 'secure': False, 'value': '51854390'}, {'domain': '.zhihu.com', 'expiry': 1559504818, 'httpOnly': False, 'name': '__utmz', 'path': '/', 'secure': False, 'value': '51854390.1543736818.1.1.utmcsr=(direct)|utmccn=(direct)|utmcmd=(none)'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '__utmv', 'path': '/', 'secure': False, 'value': '51854390.000--|3=entry_date=20181202=1'}, {'domain': '.zhihu.com', 'expiry': 1606808818, 'httpOnly': False, 'name': '_zap', 'path': '/', 'secure': False, 'value': 'c3079d4f-f991-4026-b3a4-f10aa6d821ef'}, {'domain': 'www.zhihu.com', 'expiry': 2174456818, 'httpOnly': False, 'name': 'dwj', 'path': '/', 'secure': True, 'value': 'germey'}]
[]

Process finished with exit code 0

选项卡

#!/usr/bin/python
# -*- coding: UTF-8 -*-
from selenium import webdriver
import time
browser = webdriver.Chrome()
browser.get('https://www.baidu.com')
browser.execute_script('window.open()')  #让浏览器打开一个新的选项卡
print(browser.window_handles)   #打印所有的选项卡
browser.switch_to.window(browser.window_handles[1]) #让浏览器进入到第二个选项卡并打开淘宝
browser.get('https://www.taobao.com')
time.sleep(1)
browser.switch_to.window(browser.window_handles[0]) #让浏览器进入到第一个选项卡并打开知乎
browser.get('https://www.zhihu.com')
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

晶晶娃在战斗

你的鼓励将是我创作的最大动力!

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值