自动化模块selenium使用归纳

最新推荐文章于 2021-06-02 23:20:14 发布

weixin_34308389

最新推荐文章于 2021-06-02 23:20:14 发布

阅读量267

点赞数

文章标签： python javascript 操作系统 ViewUI

原文链接：https://my.oschina.net/qingqingdego/blog/3019734

版权

2019独角兽企业重金招聘Python工程师标准>>>

安装

pip3 install selenium

测试是否成功

from selenium import webdriver

browser=webdriver.Chrome()

#如果能弹出一个空白的chrome浏览器页面，说明配置成功

浏览器对象获取

from selenium import webdriver

#browser=webdriver.Firefox()
browser=webdriver.Chrome()
#browser=webdriver.Edge()
#browser=webdriver.Safari()

print(type(browser))

#返回的是一个WebDriver对象
<class 'selenium.webdriver.chrome.webdriver.WebDriver'>

WebDriver对象的方法和属性：

add_cookie(cookie_dict)：　　为当前会话添加一个cookie，为字典类型
back()：在浏览器历史记录中后退一步
forward()：在浏览器历史上前进一步
close()：关闭当前窗口
create_web_element(element_id)：使用指定的id创建Web元素
delete_all_cookies()：删除会话范围内的所有cookie
delete_cookie(name)：删除具有给定名称的单个cookie
execute(driver_command,params=None)：发送command执行的命令
execute_async_script(script,*args)：异步执行当前窗口或框架中的JavaScript
execute_script(script,*args)：同步执行当前窗口或框架中的JavaScript

from selenium import webdriver
driver=webdriver.Chrome()
driver.get('https://www.baidu.com')
driver.execute_script("alert('are you sure');")

#它基本可以实现JavaScript的所有功能 PS：但是没有测出来如何获取js执行结果

fullscreen_window()：调用窗口管理器特定的全屏操作
get(url)：在当前浏览器会话中加载网页
get_cookie(name)：按名称获取单个cookie
get_cookies()：返回一组字典的cookies
get_log(log_type)：获取给定日志类型的日志
get_screenshot_as_base64()：获取当前窗口的屏幕截图，作为base64编码的字符串
get_screenshot_as_file(filename)：将当前窗口中的截屏保存为png图形
get_screenshot_as_png()：获取当前窗口的屏幕截图作为二进制数据
get_window_position(windowhandle='current')：获取当前窗口的x,y位置
get_window_rect()：获取窗口的x,y坐标以及当前窗口的高度和宽度
get_window_size()：获取当前窗口的高度和宽度
maximize_window()：最大化webdriver正在使用的当前窗口
minimize_window()：最小化当前webdricer使用窗口
quit()：退出驱动程序并关闭每个关联的窗口
refresh()：刷新当前页面
save_screenshot(filename)：将当前窗口的屏幕截图保存为PNG图形文件
set_page_load_timeout(time_to_wait)：设置等待页面加载完成的时间
set_script_timeout(time_to_wait)：设置脚本在执行期间等待的时间
set_window_position(x,y,windowHandle='current')：设置当前窗口的x,y位置
set_window_rect(x=None,y=None,width=None,height=None)：设置窗口的x,y坐标以及当前窗口的高度和宽度
set_window_size(width,height,windowHandle='current')：设置当前窗口的高度和宽度
current_url：获取当前页面的URL
current_window_handle：返回当前窗口的句柄
desired_capabilities：返回驱动程序当前使用的所需功能
log_types：获取可用日志类型的列表
name：返回此实例的基础浏览器的名称
page_source：获取当前页面的源码

switch_to：将焦点切换到所有选项的对象上driver.switch_to.alert
title：返回当前页面的标题
window_handles：返回当前会话中所有窗口的句柄

from selenium import webdriver

browser=webdriver.Chrome()
browser.get('http://selenium-python.readthedocs.io')
browser.execute_script('window.open("https://www.baidu.com");')  #在标签页打开URL
browser.execute_script('window.open("https://www.taobao.com");')

browser.back()  #后退到前一个页面
browser.set_page_load_timeout(5)
browser.forward()  #前进到下一个页面
print(browser.name)
print(browser.title)
print(browser.current_url)
print(browser.current_window_handle)
print(browser.get_cookies())
print(type(browser))

#
chrome
Selenium with Python — Selenium Python Bindings 2 documentation
http://selenium-python.readthedocs.io/
CDwindow-243FD31239F20FCC0195DD522A60A0DA
[{'domain': '.readthedocs.io', 'expiry': 1530766561, 'httpOnly': False, 'name': '_gid', 'path': '/', 'secure': False, 'value': 'GA1.2.1126774326.1530680157'}, {'domain': '.readthedocs.io', 'expiry': 1593752161, 'httpOnly': False, 'name': '_ga', 'path': '/', 'secure': False, 'value': 'GA1.2.2096958532.1530680157'}, {'domain': '.readthedocs.io', 'expiry': 1530680217, 'httpOnly': False, 'name': '_gat_rtfd', 'path': '/', 'secure': False, 'value': '1'}]
<class 'selenium.webdriver.chrome.webdriver.WebDriver'>

页面截图

from selenium import webdriver

driver=webdriver.Chrome()
driver.get('http://www.python.org')
driver.save_screenshot('screenshot.png')  #保持页面截图到当前路径
driver.quit()

将页面滚动到底部：

from selenium import webdriver
driver=webdriver.Chrome()
driver.get('http://www.python.org')
#通过DOM中的window对象的scrollTo方法，将窗口位置滚动到指定位置，document.body.scrollHeight返回整个body的高度，所以页面将滚动到页面底部
driver.execute_script("window.scrollTo(0,document.body.scrollHeight);")

cookies操作：（PS：这段没有测试）

from selenium import webdriver
driver=webdriver.Chrome()
driver.get('https://www.baidu.com')
print(driver.get_cookies())   #获取所有cookies
driver.add_cookie({'name':'name','domain':'www.baidu.com','value':'germey'})   #添加cookie
print(driver.get_cookies())
driver.delete_all_cookies()
print(driver.get_cookies())

元素定位

class selenium.webdriver.common.by.By
有各种策略来定位页面中的元素。你可以使用最适合你的情况。Selenium提供了以下方法来定位页面中的元素：

find_element_by_id
find_element_by_name
find_element_by_xpath
find_element_by_link_text
find_element_by_partial_link_text
find_element_by_tag_name
find_element_by_class_name
find_element_by_css_selector
要查找多个元素（这些方法将返回一个列表）：

find_elements_by_name
find_elements_by_xpath
find_elements_by_link_text
find_elements_by_partial_link_text
find_elements_by_tag_name
find_elements_by_class_name
find_elements_by_css_selector
除了上面给出的公共方法之外，还有两个私有方法可能对页面对象中的定位器有用。这些是两个私有方法：find_element和find_elements

（PS：这段也没有测试，以后用到再测试）

from selenium import webdriver
from selenium.webdriver.common.by import By
driver=webdriver.Chrome()
driver.get('http://selenium-python.readthedocs.io/locating-elements.html#locating-elements')
data=driver.find_element(By.CLASS_NAME,'simple')
#driver.find_element(By.ID,'IDname') #获取ID标签定位元素
#driver.find_element(By.CSS_SELECTOR,'cssname')#CSS选择器定位元素
#driver.find_element(By.LINK_TEXT,'linktext') #链接文本定位元素
#driver.find_element(By.PARTIAL_LINK_TEXT,'linktext') #部分链接文件定位元素
#driver.find_element(By.NAME,'name') #属性名定位元素
#driver.find_element(By.TAG_NAME,'tagname') #标签名定位元素

print(data.text)  #打印元素文本内容

元素对象

from selenium import webdriver
from selenium.webdriver.chrome.options import Options
opt=Options()
opt.add_argument('headless')
driver=webdriver.Chrome(chrome_options=opt)
driver.get('http://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.remote.webelement')
element=driver.find_element_by_id('module-selenium.webdriver.remote.webelement')
print(element)
print(type(element))

#返回一个webelement对象
<selenium.webdriver.remote.webelement.WebElement (session="dfaee65201abdf5a931306df6e7fe421", element="0.95256057244967-1")>
<class 'selenium.webdriver.remote.webelement.WebElement'>

selenium.webdriver.remote.webelement.WebElement为一个DOM元素，它的方法和属性包括：

clear() ：清除文本元素
click() ：单击元素按钮
get_attribute(name) ：获取元素的给定属性的属性值
get_property(name) ：获取元素的给定属性
is_displayed() ：判断元素是否显示 PS：强调是是否显示,不是是否存在，原文错了

元素存在判断

#   该方法用来确认元素是否存在，如果存在返回flag=true，否则返回false
def isElementExist(driver, element):
    flag = True
    try:
        driver.find_element_by_css_selector(element)
        return flag
    except:
        flag = False
        return flag


#   调用 driver是浏览器对象
    if isElementExist(driver, "[class='airy-ad-prompt-container']"):
        print("有")
    else:
        print("没有")

is_enable() ：判断元素是否被启用
is_selected() ：返回元素是否被选中
screenshot(filename) ：将当前元素的屏幕截图保存到文件
send_keys() #发送元素值
submit() :提交表单
value_of_css_property() ：CSS属性的值
id ：selenium使用的内部ID
location :元素在可渲染画布中的位置
location_once_scrolled_into_view ：发现元素在屏幕视图中的位置
rect ：返回包含元素大小和位置的字典
screenshot_as_base64 ：获取当前元素的截屏，作为base64编码的字符串
size :获取元素的大小
tag_name :获取元素的tagName属性
text ：获取元素的文本

抓取网页源码，有了它剩下的就是正则表达式捕获了

from selenium import webdriver

driver=webdriver.Chrome()
driver.get('http://www.cnblogs.com/zhangxinqi/')
element=driver.find_element_by_id('q')  #获取输入框元素
element.send_keys('python3之requests')  #发送元素
button=driver.find_element_by_id('btnZzk')  #获取搜索按钮
button.click()  #发送搜索动作
data=driver.page_source   #这里返回html源码

print(driver.current_url)   #打印URL
print(data)
print(type(element))
driver.close()

动作模拟

class selenium.webdriver.common.action_chains.ActionChains(driver)

在上面的实例中我们针对的是某个节点元素的操作，如果要对没有特定元素的对象操作如鼠标拖拽、键盘按键等，这些动作就称为动作链，selenium使用ActionChains()类来实现鼠标移动，鼠标按钮操作，按键操作和上下文菜单交互，悬停和拖放等

click(on_element=None) ——单击鼠标左键
click_and_hold(on_element=None) ——点击鼠标左键，不松开
context_click(on_element=None) ——点击鼠标右键
double_click(on_element=None) ——双击鼠标左键
drag_and_drop(source, target) ——拖拽到某个元素然后松开
drag_and_drop_by_offset(source, xoffset, yoffset) ——拖拽到某个坐标然后松开
key_down(value, element=None) ——按下某个键盘上的键
key_up(value, element=None) ——松开某个键
move_by_offset(xoffset, yoffset) ——鼠标从当前位置移动到某个坐标
move_to_element(to_element) ——鼠标移动到某个元素
move_to_element_with_offset(to_element, xoffset, yoffset) ——移动到距某个元素（左上角坐标）多少距离的位置
perform() ——执行链中的所有动作
release(on_element=None) ——在某个元素位置松开鼠标左键
send_keys(*keys_to_send) ——发送某个键到当前焦点的元素
send_keys_to_element(element, *keys_to_send) ——发送某个键到指定元素

拖拽到指定目标（PS：没有测试）

element = driver.find_element_by_name("source")
target = driver.find_element_by_name("target")
 
from selenium.webdriver import ActionChains
action_chains = ActionChains(driver)
action_chains.drag_and_drop(element, target).perform()

鼠标操作（PS：没有测试）

menu = driver.find_element_by_css_selector(".nav") #获取element对象
hidden_submenu = driver.find_element_by_css_selector(".nav #submenu1")  #获取点击对象
#创建鼠标对象
actions = ActionChains(driver)
#移动鼠标到对象
actions.move_to_element(menu)
#点击对象
actions.click(hidden_submenu)
#执行操作
actions.perform()

弹出对话框

class selenium.webdriver.common.alert.Alert(driver)

Alert内置支持处理弹窗对话框，方法：

accept() ：确认弹窗，用法：Alert(driver).appept()
authenticate(username,password) ：将用户名和密码发送到authenticated对话框，隐含点击确定，用法：driver.switch_to.alert.authenticate('username','password')
dismiss() ：取消确认
send_keys(keysToSend) ：将密钥发送到警报，keysToSend为要发送的文本
text ：获取Alert的文本

（PS：没有测试）

import time
from selenium import webdriver
from selenium.webdriver.common.alert import Alert
driver=webdriver.Chrome()
driver.get('https://www.baidu.com')
driver.execute_script("alert('确定');")  #弹出窗口
time.sleep(2)
print(driver.switch_to.alert.text) #获取alert文本
alert=Alert(driver).accept()  #自动点击确定窗口

键盘操作

class selenium.webdriver.common.keys.Keys

selenium提供一个keys包来模拟所有的按键操作，下面我们介绍下一些常用的按键操作：

回车键：Keys.ENTER
删除键：Keys.BACK_SPACE
空格键：Keys.SPACE
制表键：Keys.TAB
回退键：Keys.ESCAPE
刷新键：Keys.F5
全选（ctrl+A）：send_keys(Keys.CONTROL,'a') #组合键需要用send_keys方法操作
复制（ctrl+C）：send_keys(Keys.CONTROL,'c')
剪切（ctrl+X）：send_keys(Keys.CONTROL,'x')
粘贴（ctrl+V）：send_keys(Keys.CONTROL,'v')

实现点击页面从python的pypi页面下载selenium源码包：（PS：没有测试）

import requests
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.common.action_chains import ActionChains
driver=webdriver.Chrome()
driver.get('https://pypi.org/')

element=driver.find_element_by_id('search')  #获取输入框
element.send_keys('selenium')  #搜索selenium包
element.send_keys(Keys.ENTER)  #按回车键

element_a=driver.find_element_by_link_text('selenium') #定位selenium包链接

ActionChains(driver).move_to_element(element_a).click(element_a).perform() #按左键点击链接执行

element_down=driver.find_element_by_link_text('Download files')  #定位下载链接
ActionChains(driver).move_to_element(element_down).click(element_down).perform()  #按左键点击链接

element_selenium=driver.find_element_by_link_text('selenium-3.13.0.tar.gz')  #定位元素selenium下载包链接
data=element_selenium.get_attribute('href')   #获取链接地址
with open('selenium-3.13.0.tar.gz','wb') as f:
    source=requests.get(data).content   #请求下载链接地址获取二进制包数据
    f.write(source)  #写入数据
    f.close()
    
driver.quit()

延时等待

目前，大多数Web应用程序都在使用AJAX技术。当浏览器加载页面时，该页面中的元素可能以不同的时间间隔加载。这使定位元素变得困难：如果DOM中尚未存在元素，则locate函数将引发ElementNotVisibleException异常。使用等待，我们可以解决这个问题。等待在执行的操作之间提供了一些松弛 - 主要是使用元素定位元素或任何其他操作。

Selenium Webdriver提供两种类型的等待 - 隐式和显式。显式等待使WebDriver等待某个条件发生，然后再继续执行。在尝试查找元素时，隐式等待会使WebDriver轮询DOM一段时间。

显示等待：

显示等待是根据定义的代码，用于在进一步执行代码之前等待某个条件发送，它提供了一些便捷方法，可以编写在仅需要等待的代码上，实现方法需要WebDriverWait与ExpectedCondition结合使用：

from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Firefox()
driver.get("http://somedomain/url_that_delays_loading")
try:
    element = WebDriverWait(driver, 10).until(
        EC.presence_of_element_located((By.ID, "myDynamicElement"))
    )
finally:
    driver.quit()

在抛出TimeoutException异常之前将等待10秒或者在10秒内发现了查找的元素。 WebDriverWait 默认情况下会每500毫秒调用一次ExpectedCondition直到结果成功返回。 ExpectedCondition成功的返回结果是一个布尔类型的true或是不为null的返回值。中文文档 https://selenium-python-zh.readthedocs.io/en/latest/getting-started.html#id2

其他等待条件：

title_is ：标题是某内容
title_contains ：标题包含某内容
presence_of_element_located ：节点加载出来，传入定位元组，如(By.ID, 'p')
visibility_of_element_located ：节点可见，传入定位元组
visibility_of ：可见，传入节点对象
presence_of_all_elements_located ：所有节点加载出来
text_to_be_present_in_element ：某个节点文本包含某文字
text_to_be_present_in_element_value ：某个节点值包含某文字
frame_to_be_available_and_switch_to_it ：加载并切换
invisibility_of_element_located ：节点不可见
element_to_be_clickable ：节点可点击
staleness_of ：判断一个节点是否仍在DOM，可判断页面是否已经刷新
element_to_be_selected ：节点可选择，传节点对象
element_located_to_be_selected ：节点可选择，传入定位元组
element_selection_state_to_be ：传入节点对象以及状态，相等返回True，否则返回False
element_located_selection_state_to_be ：传入定位元组以及状态，相等返回True，否则返回False
alert_is_present ：是否出现警告

中文文档 https://selenium-python-zh.readthedocs.io/en/latest/waits.html

摘自 https://www.cnblogs.com/zhangxinqi/p/9259808.html

最后附上一份截图源码，改了好一会才完成，原始版，未整理

#!/usr/bin/python3
# 截图测试
#
from selenium import webdriver
import unittest
import os, sys, time
import exescript
from PIL import Image


# 合并截图,并且裁剪多余部分
def image_merge(li_path, img_width, img_height):
    print("图片处理")
    max_width = 0
    total_height = 0
    # 计算合成后图片的宽度（以最宽的为准）和高度
    for img_path in li_path:
        if os.path.exists(img_path):
            img = Image.open(img_path)
            width, height = img.size
            if width > max_width:
                max_width = width
            total_height += height
    # 产生一张空白图
    new_img = Image.new("RGB", (max_width, total_height), 255)
    # 合并
    x = y = 0
    for img_path in li_path:
        if os.path.exists(img_path):
            img = Image.open(img_path)
            width, height = img.size
            new_img.paste(img, (x, y))
            y += height
    # 裁剪多余
    print(new_img.size)
    new_img.save("data/xq.jpg", "JPEG", quality=95)
    img1 = Image.open("data/xq.jpg")
    print("宽度", img1.width)
    print("图片宽度", img_width)
    # 计算白边
    cut = ((img1.width - img_width) / 2) - 100
    print("白边", cut)
    ok_scr = img1.width - cut

    img_cut_size = (cut, 0, ok_scr, img_height)
    img2 = img1.crop(img_cut_size)
    img2.save("data/xq_cut.jpg", "JPEG", quality=95)


# 登录
current_time = time.strftime("%Y-%m-%d-%H_%M_%S", time.localtime(time.time()))
current_time1 = time.strftime("%Y-%m-%d", time.localtime(time.time()))
print(current_time)
print(current_time1)

dpath = "config\chromedriver.exe"
options = webdriver.ChromeOptions()
options.add_argument(
    'user-agent="Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36"'
)
options.add_argument("--headless")
# options.add_argument("headless")  # 静默浏览器
# 设置成中文
# options.add_argument('lang=zh_CN.UTF-8')
options.add_argument("en-us")
# 谷歌文档提到需要加上这个属性来规避bug
options.add_argument("--disable-gpu")
# 禁止加载所有插件，可以增加速度。可以通过about:plugins页面查看效果
options.add_argument("–disable-plugins")
# 配了环境变量第一个参数就可以省了，不然传绝对路径
driver = webdriver.Chrome(executable_path=dpath, chrome_options=options)
# driver = webdriver.Chrome(executable_path=dpath)
url = "https://www.amazon.com/dp/B01L1F1OV6"
# 设定屏幕宽 和 高
w_width = 1920
w_height = 1080
driver.set_window_size(w_width, w_height)

driver.get(url)

# 必须打印图片路径HTMLTestRunner才能捕获并且生成路径，\image\**\\**.png 是获取路径的条件,必须这样的目录
# 设置存储图片路径，测试结果图片可以按照每天进行区分
# 通过if进行断言判断
# driver.get("https://baidu.com/")
# # 新创建路径“.”表示当前整个.py文件的路径所在的位置，“\\”路径分割符，其中的一个是“\”表示转义字符
# pic_path = ".\\result\\image\\" + current_time1 + "\\" + current_time + ".png"
# print(pic_path)
# time.sleep(5)
# print(driver.title)
# 截取当前url页面的图片，并将截取的图片保存在指定的路径下面（pic_path），注：以下两种方法都可以
# driver.save_screenshot(pic_path)

# 可以注入，但是不一定可以返回
time.sleep(5)
# exejs = exescript.ExeJs(driver)
# js_th ='return document.getElementsByClassName("a-section a-spacing-extra-large bucket")[0].clientHeight.toString()'
# exejs.exeWrap(js_th)
th = driver.execute_script("return document.title;")
print(th)
# 滚动到指定位置
print("滚动到指定位置")
js = 'window.location.hash="aplus"'
driver.execute_script(js)
# 不允许这样使用符号类名
# ts1 = driver.find_element_by_class_name("a-section.a-spacing-extra-large.bucket")
li_img = []  # 定义截图保存路径
try:
    dw = driver.find_element_by_id("dpx-aplus-3p-product-description_feature_div")
    # print("定位1", dw.size)
    # print("定位1", dw.location)  # 可行
    dw1_height = dw.location["y"]
    # print("定位1", dw.location_once_scrolled_into_view)

    xq = driver.find_element_by_css_selector(
        "[class='a-section a-spacing-extra-large bucket']"
    )
    # print("定位2", xq.size)
    xq_width = xq.size["width"]
    xq_height = xq.size["height"]
    all_height = 0
    all_height = xq_height
    driver.save_screenshot("data/sc_1.png")
    li_img.append("data/sc_1.png")
    # 获取浏览器可视区域高度
    th_see_height = driver.execute_script("return window.innerHeight;")
    print("可视高度", th_see_height)
    # 详情本身尺寸
    xq_ok = driver.find_element_by_css_selector("[class='aplus-v2 desktop celwidget']")
    # print("定位2", xq.size)
    xq_ok_width = xq_ok.size["width"]
    print("展示高度", xq_ok_width)

    # 如果详情高度大于设定屏幕高度,就需要二次滚动,甚至多次滚动
    num = 2
    while all_height >= th_see_height:
        print("滚动到指定位置")
        th = 0
        th = dw1_height + th_see_height
        js = "window.scroll(0," + str(th) + ")"
        driver.execute_script(js)
        t_path = ""
        t_path = "data/sc_" + str(num) + ".png"
        driver.save_screenshot(t_path)
        li_img.append(t_path)
        all_height = all_height - th_see_height
        if all_height <= th_see_height:
            break
        else:
            num += 1

    print("截图次数", num)
    print("截图完成,数据如下")
    print("路径", li_img)
    print("尺寸宽", xq_width)
    print("尺寸高", xq_height)
    image_merge(li_img, xq_ok_width, xq_height)
except Exception as result:
    print("检测出异常{}".format(result))

time.sleep(1)
driver.close()

转载于:https://my.oschina.net/qingqingdego/blog/3019734