selenium使用

最新推荐文章于 2024-09-05 20:03:41 发布

努力减肥的钱学生

最新推荐文章于 2024-09-05 20:03:41 发布

阅读量1.1k

点赞数 20

文章标签： selenium 测试工具

本文链接：https://blog.csdn.net/weixin_39504722/article/details/135324550

版权

1.最大化窗口，设置窗口大小，前后左右移动

from selenium import webdriver
from time import sleep
from selenium.webdriver.chrome.service import Service

def test_max():  
    #创建一个浏览器
    chrome=webdriver.Chrome()
    #发送请求
    chrome.get('http://www.baidu.com/')
    #最大化窗口
    chrome.maximize_window()
    sleep(3)
    #关闭浏览器
    chrome.quit()
def test_size():
     #创建一个浏览器
    chrome=webdriver.Chrome()
    #发送请求
    chrome.get('http://www.baidu.com/')
    chrome.set_window_size(800, 600)
    sleep(3)
    chrome.quit()
def move():
     #创建一个浏览器
    chrome=webdriver.Chrome()
    #发送请求
    chrome.get('http://www.baidu.com/')
    sleep(3)
    chrome.get('http://news.baidu.com/')
    sleep(3)
    chrome.back()
    sleep(3)
    chrome.forward()
    sleep(3)
    chrome.quit()
    
if __name__ == '__main__':
    # test_max()
    # test_size()
    move()

2.定位元素，下拉菜单

对象的定位应该是自动化的核心，要想操作一个对象，首先应该识别这个对象。一个对象就是一个人一样，他会有各种的特征（属性），如比我们可以通过一个人的身份证号，姓名，或者他住在哪个街道、楼层、门牌找到这个人。

对象定位

webdriver提供了对象定位方法

find_element(type,value)
find_elements(type,value)

利用 By 类来确定哪种选择方式

from selenium import webdriver
from time import sleep
from selenium.webdriver.chrome.service import Service
from selenium.webdriver.common.by import By
import os
def test_find():  
    #创建一个浏览器
    chrome=webdriver.Chrome()
    #设置方位文件地址
    file_path='file:///'+os.path.abspath('./pachogn/menu.html')
    #发送请求
    chrome.get(file_path)
    #定位父级元素
    chrome.find_element(By.LINK_TEXT,'Link1').click()
    sleep(1)
    #找到要移动的位置
    menu=chrome.find_element(By.LINK_TEXT,'Action')
    webdriver.ActionChains(chrome).move_to_element(menu).perform()
    sleep(3)
    #定位子级元素
    chrome.quit()
    
if __name__ == '__main__':
    test_find()

By 类的一些属性如下

ID = "id"
NAME = "name"
XPATH = "xpath"
LINK_TEXT = "link text"
PARTIAL_LINK_TEXT = "partial link text"
TAG_NAME = "tag name"
CLASS_NAME = "class name"
CSS_SELECTOR = "css selector"

操作元素

前面讲到了不少知识都是定位元素，定位只是第一步，定位之后需要对这个原素进行操作。

鼠标点击呢还是键盘输入，这要取决于我们定位的是按钮还输入框。

一般来说，webdriver中比较常用的操作对象的方法有下面几个

click 点击对象
send_keys 在对象上模拟按键输入
clear 清除对象的内容，如果可以的话

3.窗口定位

对于一个现代的web应用，经常会出现框架（frame）或窗口（window）的应用，这也就给我们的定位带来了一个难题。

有时候我们定位一个元素，定位器没有问题，但一直定位不了，这时候就要检查这个元素是否在一个frame中，seelnium webdriver 提供了一个switch_to_frame方法，可以很轻松的来解决这个问题

多层框架或窗口的定位：

driver.switch_to.frame()

from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
import os
def select_frame():
    #创建浏览器
    chrome=webdriver.Chrome()
    #打开网页
    file_path='file:///'+os.path.abspath('./pachogn/frame.html')
    chrome.get(file_path)
    chrome.switch_to.frame('f1')
    chrome.switch_to.frame('f2')
    chrome.find_element(By.ID,'sb_form_q').send_keys('百战')
    chrome.find_element(By.ID,'search_icon').click()
    sleep(3)
    chrome.quit()

if __name__ == '__main__':
    select_frame()

4.处理弹出框

有时，页面可能要弹窗口。只需要去定位弹窗上的“确定”按钮即可

switch_to

焦点集中到页面上的一个警告（提示）
accept()

接受警告提示

from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
import os
def select_frame():
    #创建浏览器
    chrome=webdriver.Chrome()
    #打开网页
    file_path='file:///'+os.path.abspath('./pachogn/window.html')
    chrome.get(file_path)
    sleep(3)
    chrome.switch_to.alert.accept()
    sleep(3)
    chrome.quit()

if __name__ == '__main__':
    select_frame()

5.拖拽元素

要完成元素的拖拽，首先需要指定被拖动的元素和拖动目标元素，然后利用 ActionChains 类来实现，ActionChains用于定制动作。通过ActionChains对象中的perform()执行动作

from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from time import sleep
from selenium.webdriver.common.by import By
from selenium.webdriver import ActionChains
import os
def select_frame():
    #创建浏览器
    chrome=webdriver.Chrome()
    #打开网页
    file_path='file:///'+os.path.abspath('./pachogn/move.html')
    chrome.get(file_path)
    sleep(3)
    div1=chrome.find_element(By.ID,'draggable')
    div2=chrome.find_element(By.ID,'draggable2')
    div3=chrome.find_element(By.ID,'draggable3')
    ac1=ActionChains(chrome).drag_and_drop(div1, div2)
    ac1.perform()
    sleep(3)
    ac2=ActionChains(chrome).drag_and_drop_by_offset(div3,10,10)
    for i in range(10):
        ac2.perform()
        sleep(1)
    sleep(3)
    chrome.quit()

if __name__ == '__main__':
    select_frame()

6.调用JS

有时候我们需要控制页面滚动条上的滚动条，但滚动条并非页面上的元素，这个时候就需要借助js是来进行操作

一般用到操作滚动条的会两个场景：

要操作的页面元素不在当前页面范围，无法进行操作，需要拖动滚动条
注册时的法律条文需要阅读，判断用户是否阅读的标准是：滚动条是否拉到最下方

调用js的方法

execute_script(script, *args)

滚动条回到顶部：

js="document.getElementById('id').scrollTop=0"
driver.execute_script(js)

滚动条拉到底部

js="document.documentElement.scrollTop=10000"
driver.execute_script(js)

可以修改scrollTop 的值，来定位右侧滚动条的位置，0是最上面，10000是最底部

以上方法在Firefox和IE浏览器上上是可以的，但是用Chrome浏览器，发现不管用。Chrome浏览器解决办法：

js = "document.body.scrollTop=0"
driver.execute_script(js)

横向滚动条

js = "window.scrollTo(100,400)"
driver.execute_script(js)

from selenium.webdriver.chrome.service import Service
from selenium import webdriver
from time import sleep
from lxml import etree


def test_scroll():
  # 创建驱动
  s = Service('./chromedriver.exe')
  # 创建浏览器
  driver = webdriver.Chrome(service=s)
  # 访问页面
  driver.get("https://search.jd.com/Search?keyword=%E6%89%8B%E6%9C%BA&enc=utf-8&suggest=1.def.0.SAK7|MIXTAG_SAK7R,SAK7_M_AM_L5385,SAK7_M_COL_R,SAK7_S_AM_R,SAK7_SC_PD_R,SAK7_SM_PB_R,SAK7_SS_PM_R,tsabtest_base64_U2VhcmNobGlzdF80MzkyfGJhc2U_tsabtest|&wq=shouji&pvid=24340a2def0e4e0cb510af07aa32c89d")
  # 拉动滚动条到底部
  js= 'document.documentElement.scrollTop=100000'
  driver.execute_script(js)
  sleep(1)
  # 创建一个etree对象，用于解析数据
  e = etree.HTML(driver.page_source)
  # 获取数据价格
  prices = e.xpath('//ul[@class="gl-warp clearfix"]/li/div/div/strong/i/text()')
  print(prices)
  print(len(prices))
  # 关闭浏览器
  sleep(3)
  driver.quit()


if __name__ =='__main__':
  test_scroll()

7.等待元素

1.强制等待

使用 time.sleep

作用：当代码运行到强制等待这一行的时候，无论出于什么原因，都强制等待指定的时间，需要通过time模块实现

优点：简单

缺点：无法做有效的判断，会浪费时间

隐式等待

chrome.implicitly_wait(time_num)

到了一定的时间发现元素还没有加载，则继续等待我们指定的时间，如果超过了我们指定的时间还没有加载就会抛出异常，如果没有需要等待的时候就已经加载完毕就会立即执行

优点：设置一次即可

缺点：必须等待加载完成才能到后续的操作，或者等待超时才能进入后续的操作

from selenium import webdriver
url = 'https://www.baidu.com/'
driver = webdriver.Chrome()
driver.get(url)
driver.implicitly_wait(10)
print(driver.find_element_by_class_name('next'))
print(driver.page_source)

显示等待

from selenium.webdriver.support.wait import WebDriverWait

指定一个等待条件，并且指定一个最长等待时间，会在这个时间内进行判断是否满足等待条件，如果成立就会立即返回，如果不成立，就会一直等待，直到等待你指定的最长等待时间，如果还是不满足，就会抛出异常，如果满足了就会正常返回

优点：专门用于对指定一个元素等待，加载完即可运行后续代码

缺点：多个元素都需要要单独设置等待

url = 'https://www.guazi.com/nj/buy/'
driver = webdriver.Chrome()
driver.get(url)
wait = WebDriverWait(driver,10,0.5)
wait.until(EC.presence_of_element_located((By.CLASS_NAME, 'next')))
print(driver.page_source)

8.参数使用

1.无头浏览器

def test_headless():
  # 设置参数，将浏览器隐藏起来(无头浏览器)
  options = ChromeOptions()
  options.add_argument('--headless')
  # 设置驱动
  service = Service('./chromedriver')
  # 启动Chrome浏览器
  driver = Chrome(service=service,options=options)
  # 访问页面
  driver.get('https://www.baidu.com')
  # 打印代码
  print(driver.page_source)
  #  关闭浏览器
  driver.quit()

2.代理模式

def test_proxy1():
  # 设置参数，给浏览器设置代理
  options = ChromeOptions()
  # options.add_argument('--proxy-server=http://ip:port')
  options.add_argument('--proxy-server=http://221.199.36.122:35414')
  # 设置驱动
  service = Service('./chromedriver')
  # 启动Chrome浏览器
  driver = Chrome(service=service,options=options)
  # 访问页面  "134.195.101.16",
  driver.get('http://httpbin.org/get')
  # 打印代码
  print(driver.page_source)
  #  关闭浏览器
  driver.quit()


def test_proxy2():
  from selenium.webdriver.common.proxy import ProxyType,Proxy
  # 设置参数，给浏览器设置代理
  ip = 'http://113.76.133.238:35680'
  proxy = Proxy()
  proxy.proxy_type = ProxyType.MANUAL
  proxy.http_proxy = ip
  proxy.ssl_proxy = ip
  # 关联浏览器
  capabilities = DesiredCapabilities.CHROME
  proxy.add_to_capabilities(capabilities)


  # 设置驱动
  service = Service('./chromedriver')
  # 启动Chrome浏览器
  driver = Chrome(service=service,desired_capabilities=capabilities)
  # 访问页面  "134.195.101.16",
  driver.get('http://httpbin.org/get')
  # 打印代码
  print(driver.page_source)
  #  关闭浏览器
  driver.quit()

3.防检测模式

from selenium.webdriver import Chrome
from selenium.webdriver import ChromeOptions


options = ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
options.add_experimental_option('useAutomationExtension', False)




chrome = Chrome(chrome_options=options)


chrome.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
  "source": """
     Object.defineProperty(navigator, 'webdriver', {
     get: () => false
     })
   """
})


chrome.get('http://httpbin.org/get')
info = chrome.page_source


print(info)
sleep(20)

9.实战

from selenium.webdriver.chrome.service import Service
from selenium.webdriver import Chrome
from selenium.webdriver.common.by import By


from lxml import etree


def spider_huya():
  # 创建一个驱动
  service = Service('./chromedriver.exe')
  # 创建一个浏览器
  driver = Chrome(service=service)
  # 设置隐式等待
  driver.implicitly_wait(5)
  # 访问网址
  driver.get('https://www.huya.com/g/lol')
  count = 1
  while True:
    # print('获取了第%d页' % count)
    # count += 1
    # 提取数据
    e = etree.HTML(driver.page_source)
    names = e.xpath('//i[@class="nick"]/@title')
    person_nums = e.xpath('//i[@class="js-num"]/text()')
    # 打印数据
    # for n,p in zip(names,person_nums):
    #   print(f'主播名:{n}  人气:{p}')
    
    # 找到下一页的按钮


    # try:
    #   next_btn = driver.find_element(By.XPATH,'//a[@class="laypage_next"]')
    #   next_btn.click()
    # except Exception as e:
    #   break
    if driver.page_source.find('laypage_next') == -1:
      break
    next_btn = driver.find_element(By.XPATH,'//a[@class="laypage_next"]')
    next_btn.click()
    
  # 关闭浏览器
  driver.quit()




if __name__ == '__main__':
  spider_huya()