风火编程--selenium和webdriver等待/点击/frame/window切换窗口最大化/滚动/js定位

风火编程

已于 2023-11-17 19:55:28 修改

阅读量1.1k

点赞数 2

分类专栏： python应用文章标签： selenium 模拟浏览器 webdriver 动态加载等待

于 2018-11-05 11:21:52 首次发布

本文链接：https://blog.csdn.net/weixin_42620314/article/details/82960165

版权

python应用专栏收录该内容

20 篇文章

订阅专栏

chromedriver的一些初始化设置

import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
    chrome_options = Options()
    # 设置无头
    chrome_options.add_argument('--headless')
    # 设置不加载图片
    prefs = {"profile.managed_default_content_settings.images": 2}
    chrome_options.add_experimental_option("prefs", prefs)
    driver = webdriver.Chrome(
    chrome_options=chrome_options, executable_path=r'D:\software\python36\chromedriver.exe')
    # 设置页面加载超时
    driver.set_page_load_timeout(20) 
    # 设置页面异步js执行超时
   driver.set_script_timeout(10)

等待

显式等待

   from selenium.webdriver.support.wait import WebDriverWait
   from selenium.webdriver.support import expected_conditions as EC

wait = WebDriverWait(driver, max_wait)
wait.until(EC.element_to_be_clickable((By.ID, 'sendPhoneCode'))).click()

# 其他判断加载的方法

# *****常用: 判断元素以加载到DOM, 传入element
presence_of_element_located
 # *****常用: 判断某个元素中是否可点击
element_to_be_clickable

   # 判断某个元素是否可见, 传入locator
invisibility_of_element_located
 # 判断该frame是否可以switch进去，如果是返回True并且switch进去，否则返回False
frame_to_be_available_and_switch_to_it
    # 判断某个元素中的text是否 包含 了预期的字符串
text_to_be_present_in_element
# 判断某个元素中的value属性是否包含了预期的字符串
text_to_be_present_in_element_value
# 将某个元素从dom树中移除，返回True或False
staleness_of
# 判断某个元素是否被选中了,一般用在下拉列表
element_to_be_selected
# 判断元素是否可见
visibility_of
# 判断标题等于
title_is
# 标题包含
title_contains
# 判断页面上是否存在alert
alert_is_present
# 确认弹出框
driver.switch_to.alert.accept()

隐式等待

driver.implicitly_wait(seconds)

强制等待

time.sleep(seconds)

新标签中打开url, 并切换到该标签

js = 'window.open("{}");'.format(dl_url)
driver.execute_script(js)
driver.switch_to_window(driver.window_handles[1])

窗口最大化

最大化方式打开

chrome_options = Options()
chrome_options.add_argument('--start-maximized')

已开窗口最大化

driver.maximize_window()

窗口切换

driver.switch_to.window(driver.window_handles[-1])

iframe的处理

sleep(2)
iframe = driver.find_element_by_xpath('//iframe[@class="iframe"]')
 driver.switch_to.frame(iframe)
 driver.switch_to_default_content()
 load = driver.find_element_by_xpath('//a[@id="WkDialogOk"]')
 load.click()
 driver.close()

滚动加载

js滚动1

time.sleep(2)
js = "var q=document.documentElement.scrollTop=1000"
or
js = 'page.evaluate("window.scrollTo(0, document.body.scrollHeight);")'
driver.execute_script(js)

js滚动2

target = browser.find_element_by_class_name("loadmore disabled")
if target.xpath('./text()').extract_first() != "没有更多了":
# 拖动到可见的元素去
    js = "arguments[0].scrollIntoView();"
    driver.execute_script(js, target)
    time.sleep(1)

js滚动3

    js = "window.scrollBy(0,{})"
    driver.execute_script(js.format(random.uniform(0, 5000)))

pagedown滚动

try:
    for i in range(25):
        roll = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@lang="en"]')))
        roll.send_keys(Keys.PAGE_DOWN)
        sleep(2)
except:
    driver.quit()

点击

正常点击

target.click()

被遮挡的元素点击

from selenium.webdriver.common.keys import Keys
target.sendkeys(Keys.ENTER)

js点击1

js = 'var q=document.getElementById("map-distance").click()'
driver.execute_script(js)

js点击2

js1 = 'var q=document.getElementById("mapdistance").children[3].setAttribute("class","on")'
js2 = 'var a=document.getElementById("mapdistance").children[0].removeAttribute("class")'
driver.execute_script(js1)
driver.execute_script(js2)

js定位元素

# 获取指定 ID 的元素
document.getElementById()
# 获取包含带有指定类名的所有元素的节点列表
document.getElementsByClassName()    
# 获取指定Name的所有元素的节点列表
document.getElementsByName()    
# 获取带有指定标签名称的所有元素的节点列表
document.getElementsByTagName()

提取标签数据

element = driver.find_element_by_id("...")
# 获取文本
text = element.text
# 获取属性
href = element.get_attribute("href")
# 隐藏的文本内容
判断是否可见
erlement.is_display()
返回值为False的一般隐藏在以下三个属性中:
textContent / innerText / innerHTML
text = element.get_attribute('textContent ')

DEMO

from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait

input = wait.until(EC.presence_of_element_located((By.ID, 'input')))
input.send_keys("keywords")
button= wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="btn"]')))
 button.click()

参考链接:

https://www.cnblogs.com/nbkhic/p/4885041.html
服务器部署

https://blog.csdn.net/fengmm521/article/details/79661771

注意
如果使用–headless拿不到数据, 是因为版本太高了, 换成62之前的版本即可
http://www.manongjc.com/article/7635.html
创建chromedriver的软连接到全局路径

ln -s /opt/google/chromedriver /usr/bin/chromedriver

chrome历史版本下载链接

https://www.chromedownloads.net/chrome64linux-stable/
https://www.chromedownloads.net/chrome64linux/

chrome与driver版本
https://www.chromedownloads.net/chrome64linux/
https://downzen.com/en/windows/google-chrome/versions/

driver下载

http://chromedriver.storage.googleapis.com/index.html
https://phantomjs.org/download.html

selenium支持phontomjs 的版本

pip3 install selenium==2.48.0

phontomjs参考链接

https://www.168seo.cn/python-2/3385.html


dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
    "Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")
# 设置user-agent请求头
dcap["phantomjs.page.settings.loadImages"] = False  # 禁止加载图片
driver = webdriver.PhantomJS(desired_capabilities=dcap)
#使用代理
chrome_options.add_argument("--proxy-server=http://202.20.16.82:10152")

设置为开发模式

此步骤很重要，设置chrome为开发者模式，防止被各大网站识别出来

options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])

截取指定坐标范围的图片

加载图片

image = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="qrcode-img"]/img')))

获取图片左上角坐标

location = image.location
print('参考坐标:{}'.format(location))

获取图片参考尺寸

size = image.size()
print('参考尺寸:{}'.format(size))

获取整个页面的图片

img_screen = driver.get_screenshot_as_png()
open_img = Image.open(BytesIO(img_screen))

首次截取图片

left = location['x']
top = location['y']
height = size['height']
width = size['width']
right = left + width
bottom = top + height
print(left, top, right, bottom)
target_img = open_img.crop((left, top, right, bottom))
target_img.show()

调整left, top, right, bottom的值
直到得到理想的效果

保存图片

target_img .save('target_img.png')

设定参数
options = webdriver.ChromeOptions()
options.binary_location = chromebrowser_path # 指定浏览器路径
# options.add_argument(“–window-size=800,600”) # 指定窗口大小打开
options.add_argument(“–start-maximized”) # 全屏打开
options.add_argument(“–window-position=100,100”) # 指定窗口位置
prefs = {“profile.managed_default_content_settings.images”: 2} # 不加载图片
options.add_experimental_option(“prefs”, prefs)

if not head:
    options.add_argument('--headless')  # 无头
else:
    options.add_argument(
        '--user-data-dir=' + chromedata_path)  # 使用chrome插件

driver_ = webdriver.Chrome(executable_path=chromedriver_path, options=options)
wait_ = WebDriverWait(driver_, 30)