chromedriver的一些初始化设置
import time
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
chrome_options = Options()
# 设置无头
chrome_options.add_argument('--headless')
# 设置不加载图片
prefs = {"profile.managed_default_content_settings.images": 2}
chrome_options.add_experimental_option("prefs", prefs)
driver = webdriver.Chrome(
chrome_options=chrome_options, executable_path=r'D:\software\python36\chromedriver.exe')
# 设置页面加载超时
driver.set_page_load_timeout(20)
# 设置页面异步js执行超时
driver.set_script_timeout(10)
等待
显式等待
from selenium.webdriver.support.wait import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
wait = WebDriverWait(driver, max_wait)
wait.until(EC.element_to_be_clickable((By.ID, 'sendPhoneCode'))).click()
# 其他判断加载的方法
# *****常用: 判断元素以加载到DOM, 传入element
presence_of_element_located
# *****常用: 判断某个元素中是否可点击
element_to_be_clickable
# 判断某个元素是否可见, 传入locator
invisibility_of_element_located
# 判断该frame是否可以switch进去,如果是返回True并且switch进去,否则返回False
frame_to_be_available_and_switch_to_it
# 判断某个元素中的text是否 包含 了预期的字符串
text_to_be_present_in_element
# 判断某个元素中的value属性是否包含了预期的字符串
text_to_be_present_in_element_value
# 将某个元素从dom树中移除,返回True或False
staleness_of
# 判断某个元素是否被选中了,一般用在下拉列表
element_to_be_selected
# 判断元素是否可见
visibility_of
# 判断标题等于
title_is
# 标题包含
title_contains
# 判断页面上是否存在alert
alert_is_present
# 确认弹出框
driver.switch_to.alert.accept()
隐式等待
driver.implicitly_wait(seconds)
强制等待
time.sleep(seconds)
新标签中打开url, 并切换到该标签
js = 'window.open("{}");'.format(dl_url)
driver.execute_script(js)
driver.switch_to_window(driver.window_handles[1])
窗口最大化
最大化方式打开
chrome_options = Options()
chrome_options.add_argument('--start-maximized')
已开窗口最大化
driver.maximize_window()
窗口切换
driver.switch_to.window(driver.window_handles[-1])
iframe的处理
sleep(2)
iframe = driver.find_element_by_xpath('//iframe[@class="iframe"]')
driver.switch_to.frame(iframe)
driver.switch_to_default_content()
load = driver.find_element_by_xpath('//a[@id="WkDialogOk"]')
load.click()
driver.close()
滚动加载
js滚动1
time.sleep(2)
js = "var q=document.documentElement.scrollTop=1000"
or
js = 'page.evaluate("window.scrollTo(0, document.body.scrollHeight);")'
driver.execute_script(js)
js滚动2
target = browser.find_element_by_class_name("loadmore disabled")
if target.xpath('./text()').extract_first() != "没有更多了":
# 拖动到可见的元素去
js = "arguments[0].scrollIntoView();"
driver.execute_script(js, target)
time.sleep(1)
js滚动3
js = "window.scrollBy(0,{})"
driver.execute_script(js.format(random.uniform(0, 5000)))
pagedown滚动
try:
for i in range(25):
roll = wait.until(EC.presence_of_element_located((By.XPATH, '//*[@lang="en"]')))
roll.send_keys(Keys.PAGE_DOWN)
sleep(2)
except:
driver.quit()
点击
正常点击
target.click()
被遮挡的元素点击
from selenium.webdriver.common.keys import Keys
target.sendkeys(Keys.ENTER)
js点击1
js = 'var q=document.getElementById("map-distance").click()'
driver.execute_script(js)
js点击2
js1 = 'var q=document.getElementById("mapdistance").children[3].setAttribute("class","on")'
js2 = 'var a=document.getElementById("mapdistance").children[0].removeAttribute("class")'
driver.execute_script(js1)
driver.execute_script(js2)
js定位元素
# 获取指定 ID 的元素
document.getElementById()
# 获取包含带有指定类名的所有元素的节点列表
document.getElementsByClassName()
# 获取指定Name的所有元素的节点列表
document.getElementsByName()
# 获取带有指定标签名称的所有元素的节点列表
document.getElementsByTagName()
提取标签数据
element = driver.find_element_by_id("...")
# 获取文本
text = element.text
# 获取属性
href = element.get_attribute("href")
# 隐藏的文本内容
判断是否可见
erlement.is_display()
返回值为False的一般隐藏在以下三个属性中:
textContent / innerText / innerHTML
text = element.get_attribute('textContent ')
DEMO
from selenium.webdriver.common.by import By
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.support.wait import WebDriverWait
input = wait.until(EC.presence_of_element_located((By.ID, 'input')))
input.send_keys("keywords")
button= wait.until(EC.element_to_be_clickable((By.XPATH, '//div[@class="btn"]')))
button.click()
参考链接:
https://www.cnblogs.com/nbkhic/p/4885041.html
服务器部署
https://blog.csdn.net/fengmm521/article/details/79661771
注意
如果使用–headless拿不到数据, 是因为版本太高了, 换成62之前的版本即可
http://www.manongjc.com/article/7635.html
创建chromedriver的软连接到全局路径
ln -s /opt/google/chromedriver /usr/bin/chromedriver
chrome历史版本下载链接
https://www.chromedownloads.net/chrome64linux-stable/
https://www.chromedownloads.net/chrome64linux/
chrome与driver版本
https://www.chromedownloads.net/chrome64linux/
https://downzen.com/en/windows/google-chrome/versions/
driver下载
http://chromedriver.storage.googleapis.com/index.html
https://phantomjs.org/download.html
selenium支持phontomjs 的版本
pip3 install selenium==2.48.0
phontomjs参考链接
https://www.168seo.cn/python-2/3385.html
dcap = dict(DesiredCapabilities.PHANTOMJS)
dcap["phantomjs.page.settings.userAgent"] = (
"Mozilla/5.0 (Windows NT 6.1) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/49.0.2623.221 Safari/537.36 SE 2.X MetaSr 1.0")
# 设置user-agent请求头
dcap["phantomjs.page.settings.loadImages"] = False # 禁止加载图片
driver = webdriver.PhantomJS(desired_capabilities=dcap)
#使用代理
chrome_options.add_argument("--proxy-server=http://202.20.16.82:10152")
设置为开发模式
此步骤很重要,设置chrome为开发者模式,防止被各大网站识别出来
options = webdriver.ChromeOptions()
options.add_experimental_option('excludeSwitches', ['enable-automation'])
截取指定坐标范围的图片
加载图片
image = wait.until(EC.presence_of_element_located((By.XPATH, '//div[@class="qrcode-img"]/img')))
获取图片左上角坐标
location = image.location
print('参考坐标:{}'.format(location))
获取图片参考尺寸
size = image.size()
print('参考尺寸:{}'.format(size))
获取整个页面的图片
img_screen = driver.get_screenshot_as_png()
open_img = Image.open(BytesIO(img_screen))
首次截取图片
left = location['x']
top = location['y']
height = size['height']
width = size['width']
right = left + width
bottom = top + height
print(left, top, right, bottom)
target_img = open_img.crop((left, top, right, bottom))
target_img.show()
调整left, top, right, bottom的值
直到得到理想的效果
保存图片
target_img .save('target_img.png')
设定参数
options = webdriver.ChromeOptions()
options.binary_location = chromebrowser_path # 指定浏览器路径
# options.add_argument(“–window-size=800,600”) # 指定窗口大小打开
options.add_argument(“–start-maximized”) # 全屏打开
options.add_argument(“–window-position=100,100”) # 指定窗口位置
prefs = {“profile.managed_default_content_settings.images”: 2} # 不加载图片
options.add_experimental_option(“prefs”, prefs)
if not head:
options.add_argument('--headless') # 无头
else:
options.add_argument(
'--user-data-dir=' + chromedata_path) # 使用chrome插件
driver_ = webdriver.Chrome(executable_path=chromedriver_path, options=options)
wait_ = WebDriverWait(driver_, 30)