python selenium实例_python爬虫之selenium使用案例教程

最新推荐文章于 2021-06-08 20:23:56 发布

天眞無鞋

最新推荐文章于 2021-06-08 20:23:56 发布

阅读量421

点赞数

文章标签： python selenium实例

本文链接：https://blog.csdn.net/weixin_34274601/article/details/113493649

版权

本文介绍了如何使用Python的Selenium库进行网页自动化测试和爬虫。通过实例展示了如何加载页面、设置无头浏览器、处理超时、获取页面源码、操作DOM元素以及执行JavaScript等操作。

摘要由CSDN通过智能技术生成

#　selenium:是一个web的自动化测试工具,可以直接运行在浏览器上,

# 但是并不自带浏览器,需要有浏览器驱动,selenium可以根据我们的代码指令

# 让浏览器自动加载页面,这时得到的页面源码是经过浏览器渲染之后的,

# 然后我们就可以在页面源码中寻找节点(动态加载的网页,模拟登录)

#pip3 install selenium

from selenium import webdriver

import time

#加载页面

# driver = webdriver.Firefox(

# executable_path='/home/ljh/桌面/driver/geckodriver'

# )

# #使用get方法打开页面

# driver.get('https://www.baidu.com/')

#加载页面(PhantomJS,无头浏览器)

#warnings.warn('Selenium support for PhantomJS

# has been deprecated, please use headless '

#目前推荐使用谷歌的屋头浏览器

# driver = webdriver.PhantomJS(

# executable_path='/home/ljh/桌面/driver/phantomjs'

# )

# driver.get('https://www.baidu.com/')

# driver.save_screenshot('baidu.png')

# 加载页面(使用谷歌的浏览器驱动)

#设置为无头浏览器

# opt = webdriver.ChromeOptions()

# opt.set_headless()

# driver = webdriver.Chrome(

# options=opt,

# executable_path='/home/ljh/桌面/driver/chromedriver'

# )

driver = webdriver.Chrome(

executable_path='/home/ljh/桌面/driver/chromedriver'

)

#设置页面的加载时间

driver.set_page_load_timeout(10)

#导入容错的模块

from selenium.common import exceptions

try:

driver.get('https://www.baidu.com/')

except exceptions.TimeoutException as err:

print(err,'请求超时')

#可以获得信息

# 获取页面源码(经过浏览器渲染之后的)

page_html = driver.page_source

with open('baidu.html','w') as file:

file.write(page_html)

#获取cookies信息

"""

[

{'domain':

'.baidu.com',

'httpOnly': False,

'path': '/',

'secure': False,

'value': '1431_21080_28206_28131_27750_28139_27509',

'name': 'H_PS_PSSID'},

{'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 3693275324.184597, 'secure': False, 'value': '8C1C72599F01E693A201BA4B33C6DFE0', 'name': 'BIDUPSID'}, {'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'secure': False, 'value': '0', 'name': 'delPer'}, {'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 3693275324.184649, 'secure': False, 'value': '1545791676', 'name': 'PSTM'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 1546655678, 'secure': False, 'value': '123353', 'name': 'BD_UPN'}, {'domain': 'www.baidu.com', 'httpOnly': False, 'path': '/', 'secure': False, 'value': '0', 'name': 'BD_HOME'}, {'domain': '.baidu.com', 'httpOnly': False, 'path': '/', 'expiry': 3693275324.18448, 'secure': False, 'value': '8C1C72599F01E693A201BA4B33C6DFE0:FG=1', 'name': 'BAIDUID'}]

"""

#获取所有的cookies值

cookies = driver.get_cookies()

#获取某一个cookies值

driver.get_cookie('BD_UPN')

cookies_dict = {cookie['name']:cookie['value'] for cookie in cookies}

print(cookies)

print(cookies_dict)

#删除cookie

# driver.delete_cookie('BD_UPN')

# #删除所有的cookies

# driver.delete_all_cookies()

# #添加cookies

# #cookie_dict(字典,存放的cookies信息)

# driver.add_cookie()

#获取当前加载的页面url地址

cur_url = driver.current_url

print(cur_url)

#获取当前使用的浏览器的名称

name = driver.name

print(name)

#定位和操作节点(标签)

"""

driver.find_element_by_xpath():根据xpath路径定位标签(找单个)

driver.find_elements_by_xpath()根据xpath路径定位标签(找所有)

driver.find_element_by_css_selector():根据css选择器定位标签

driver.find_element_by_link_text():根据标签文本内容(完整)定位

driver.find_element_by_partial_link_text():根据标签文本内容(局部)定位

driver.find_element_by_id():根据id属性寻找节点

driver.find_element_by_class_name():根据class属性寻找节点

"""

#找到节点,并输入内容

driver.find_element_by_id('kw').send_keys('隔壁老王')

#清空输入框

driver.find_element_by_id('kw').clear()

time.sleep(2)

driver.find_element_by_id('kw').send_keys('隔壁老赵')

#找到按钮,模拟点击

driver.find_element_by_id('su').click()

#保存屏幕的截图

driver.save_screenshot('baiduyixia.png')

# #前进后退

# time.sleep(2)

# #后退

# driver.back()

# time.sleep(2)

# #前进

# driver.forward()

#设置页面等待

#因为selenium加载页面和浏览器一样都需要时间,

#特别是动态页面,如果在页面加载出来之前,寻找节点会报异常

#所以这是需要设置页面等待

time.sleep(3)

#设置隐士等待

#是指定特定的时间,如果没有出现我们寻找的节点,

#隐士等待将会等待一段时间继续查找

driver.implicitly_wait(10)

#设置显示等待

#指定一个最长等待时间,直到某一条件成立继续执行,

#如果在指定时间内没有满足条件(没有找到节点),

#这时就会抛出异常

#导入By,根据某个条件查找节点

from selenium.webdriver.common.by import By

#WebDriverWait设置等待时间

from selenium.webdriver.support.ui import WebDriverWait

#expected_conditions设置等待条件

from selenium.webdriver.support import expected_conditions

#driver, timeout

a_element = WebDriverWait(driver,10).until(

expected_conditions.presence_of_element_located((By.CLASS_NAME,'n'))

)

print(a_element.text)

#获取节点的文本和属性

#.get_attribute('href'):获取标签的属性值

#Message: no such element: Unable to locate element 没有找到对应的节点

try:

href = driver.find_element_by_xpath('//h3[@class="t"]/a').get_attribute('href')

#.text获取标签的文本

title = driver.find_element_by_xpath('//h3[@class="t"]/a').text

print(href,title)

except exceptions.NoSuchElementException as err:

print('没有找到节点')

#隐藏所有图片execute_script执行js语句

imgs = driver.find_elements_by_xpath('//img')

for img in imgs:

driver.execute_script('$(arguments[0]).fadeOut()', img)

#### 向下滚动到页面底部

driver.execute_script('window.scrollTo(0,document.body.scrollHeight)' )

#关闭操作

#关闭当前所在的窗口

# driver.close()

# #退出浏览器

# driver.quit()

天眞無鞋

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
python selenium实例_python爬虫之selenium使用案例教程

#　selenium:是一个web的自动化测试工具,可以直接运行在浏览器上,# 但是并不自带浏览器,需要有浏览器驱动,selenium可以根据我们的代码指令# 让浏览器自动加载页面,这时得到的页面源码是经过浏览器渲染之后的,# 然后我们就可以在页面源码中寻找节点(动态加载的网页,模拟登录)#pip3 install seleniumfrom selenium import webd...
复制链接

扫一扫