等待元素加载
selenium中等待方法
time.sleep()
- 强行等待,不论元素是否加载出来,都需等
web.implicitly_wait()
- 设置后全局使用,后面元素默认遵守等待。
- 如果没加载出来,会等待一段时间。
- 如果元素加载出来了,就不用等
WebDriverWait
- 局部使用,单独等一个元素
- 如果出现了,就不用等待
- 如果等待期间没有出现,超时会报错.
from selenium import webdriver
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
web = webdriver()
# 等待10秒,0.5秒去看一次
ele = WebDriverWait(web, 10, 0.5).until(
# 等待xpath路径下的元素出现,一旦出现,结束等待
EC.presence_of_element_located((By.XPATH, "/html/body/div[5]/div[2]/div[1]/div/div"))
)
实例1
import base64 # 图片转成字节码串,在网上传输
import json
import time
from selenium.webdriver import ActionChains
from selenium.webdriver.support import expected_conditions as EC
import requests
from lxml import etree
from selenium.webdriver.common.by import By
from selenium.webdriver.support.wait import WebDriverWait
from selenium import webdriver
# 图鉴验证码类
class VerifyCode():
def __init__(self, username="用户名", password='密码'):
self.username = username
self.password = password
# 类型18识别,缺口识别(需要2张图 一张目标图一张缺口图)
def verify_缺口(self, img, img_back):
typeid = 18
with open(img, 'rb') as f:
base64_front_data = base64.b64encode(f.read())
b64_front = base64_front_data.decode()
with open(img_back, 'rb') as f:
base64_bg_data = base64.b64encode(f.read())
b64_bg = base64_bg_data.decode()
data = {"username": self.username, "password": self.password, "typeid": typeid, "image": b64_front, "imageback": b64_bg}
result = json.loads(requests.post("http://api.ttshitu.com/predict", json=data).text)
if result['success']:
return result["data"]["result"]
else:
return result["message"]
# 单缺口识别(返回X轴坐标 只需要1张图)
def verify_单缺口(self, img):
typeid = 33
with open(img, 'rb') as f:
base64_front_data = base64.b64encode(f.read())
base64_img = base64_front_data.decode()
data = {"username": self.username, "password": self.password, "typeid": typeid, "image": base64_img}
result = json.loads(requests.post("http://api.ttshitu.com/predict", json=data).text)
if result['success']:
return result["data"]["result"]
else:
return result["message"]
# 1 ~ 4个坐标
def verify_点击(self, img):
typeid = 27
with open(img, 'rb') as f:
base64_front_data = base64.b64encode(f.read())
base64_img = base64_front_data.decode()
data = {"username": self.username, "password": self.password, "typeid": typeid, "image": base64_img}
result = json.loads(requests.post("http://api.ttshitu.com/predict", json=data).text)
if result['success']:
return result["data"]["result"]
else:
return result["message"]
# 登录网站
def login(username="xxxx", password="xxxx"):
# 1. 完成登录
login_url = "https://login.zhipin.com/?ka=header-login"
web = webdriver.Chrome()
time.sleep(3)
web.get(login_url)
web.find_element_by_xpath('//*[@id="wrap"]/div[2]/div[1]/div[2]/div[1]/form/div[3]/span[2]/input').send_keys(username)
web.find_element_by_xpath('//*[@id="wrap"]/div[2]/div[1]/div[2]/div[1]/form/div[4]/span/input').send_keys(password)
web.find_element_by_xpath('//*[@id="pwdVerrifyCode"]/div').click()
ele = WebDriverWait(web, 10, 0.5).until(
EC.presence_of_element_located((By.XPATH, "/html/body/div[5]/div[2]/div[1]/div/div"))
)
# 对找到的xpath元素进行截图,并命名为
ele.screenshot("图片验证.png")
# 点击元素类的图片验证,实例类,用类中的方法
result = VerifyCode().verify_点击("图片验证.png")
points = result.split("|")
for point in points:
ps = point.split(",")
x = int(ps[0])
y = int(ps[1])
# 动作链
ActionChains(web).move_to_element_with_offset(ele, x, y).click().perform()
time.sleep(1)
web.find_element_by_xpath('/html/body/div[5]/div[2]/div[1]/div/div/div[3]/a/div').click()
time.sleep(2)
web.find_element_by_xpath('//*[@id="wrap"]/div[2]/div[1]/div[2]/div[1]/form/div[6]/button').click()
time.sleep(3)
# 获取网站源码
def get_page_source(url):
# 登陆网站
login()
# 获取cookies
web.get_cookies()
web.get(url)
# 隐式等待20秒网页上的xpath元素出现
ele = WebDriverWait(web, 20).until(
EC.presence_of_element_located((By.XPATH, "//ul[@class='job-list-box']//span[@class='job-name']/a/text()"))
)
# 查看获取的源码,并写入文件ab.html中
print(web.page_source)
i = True
with open('ab.html','w',encoding='utf-8') as f:
if i == True:
f.write(web.page_source)
i = False
return web.page_source
if __name__ == '__main__':
web = webdriver.Chrome()
for i in range(1, 10):
url = f"https://www.zhipin.com/c101010100/?query=python&page={i}&ka=page-{i}"
print(url)
content = get_page_source(url)
tree = etree.HTML(content)
job_names = tree.xpath("//ul[@class='job-list-box']//span[@class='job-name']/a/text()")
print(job_names)
print(f"正在抓取第{i}页")
跳过滑块检测
验证码之滑块检测,跳过浏览器检测设置
# 88版本以后可以用
option = Options()
# option.add_experimental_option('excludeSwitches', ['enable-automation'])
option.add_argument('--disable-blink-features=AutomationControlled')
web = Chrome(options=option)