本文章主要介绍 如何使用pyppeteer破解滑块验证码,主要分为3个步骤:
1.下载滑块验证码的原图和缺口图片。
2.计算缺口偏移量
3.模拟滑块滑动,并处理结果
首先这次的目标网站是国家企业标准网:http://www.qybz.org.cn/standardProduct/toAdvancedResult.do(该网站已经改变,滑块处理逻辑还是通用)
这个网站打开的时候就有一个滑块验证码需要点击,首先是获取原图和缺口图
import asyncio
import base64
import random
import time
from parsel import Selector
from PIL import Image, ImageChops
import matplotlib.pyplot as plt
from pyppeteer import launch
async def get_pic(page):
'''获取图片 '''
fulljs = """
() => { return document.getElementsByClassName("geetest_canvas_fullbg")[0].toDataURL("image/png") }
"""
fadejs = """
() => { return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png")}
"""
full_img = await page.evaluate(fulljs)
await get_decode_image(filename="./img/fullbg.png", data=full_img)
await asyncio.sleep(0.1)
fade_img = await page.evaluate(fadejs)
await get_decode_image(filename="./img/fadebg.png", data=fade_img)
async def get_decode_image(filename, data):
_, img = data.split(",")
img = base64.b64decode(img)
with open(filename, "wb") as f:
f.write(img)
图片样例:
接下来就是计算缺口的距离:
async def compute_gap(img1, img2):
plt.cla()
img1 = Image.open(img1)
img2 = Image.open(img2)
# 将图片修改为RGB模式
img1 = img1.convert("RGB")
img2 = img2.convert("RGB")
# 计算差值
diff = ImageChops.difference(img1, img2)
plt.figure('name1')
# plt.imshow(diff, cmap='gray')
# plt.show()
plt.clf() # 清空图片
plt.close('all')
table = []
for i in range(256):
if i < 50:
table.append(0)
else:
table.append(1)
# 灰度图
diff = diff.convert("L")
# # 二值化
diff = diff.point(table, '1')
# print(diff.getbbox()) # 这里可以直接获取差异坐标点坐标顺序为左上右下
left = 43
# # 这里做了优化为减少误差 纵坐标的像素点大于5时才认为是找到
# # 防止缺口有凸起时有误差
for w in range(left, diff.size[0]):
lis = []
for h in range(diff.size[1]):
if diff.load()[w, h] == 1:
lis.append(w)
if len(lis) > 5:
return w
注:这里获取的缺口位移坐标相对于最终位移距离 还需 -9 因为滑块的位置不是在图片最左边,中间还有一丝缝隙。(这个9是本人测试出来的,仅供参考)
最后是模拟滑块滑动 并处理结果:
async def try_validation(page,total_length,total_count=0): #模拟滑动滑块
'''滑动滑块'''
try:
lens_one = (x+total_length)/5+70 # 这里直接切割分开5断位移距离 成功率也很高
lens_two = (x+total_length)/5+30
len_three = (x+total_length)/5-20
len_four = (x+total_length)/5-30
len_five = (x+total_length)/5-50
is_monster = False
while True:
if is_monster: # 判断是否怪兽吃了 要重新获取图片 处理滑动太快的情况
print('重新获取图片')
await page.waitFor(500)
await get_pic(page) # 获取原图和缺口图
await page.waitFor(500)
total_length = await compute_gap(img1="./img/fullbg.png", img2="./img/fadebg.png") # 计算滑块移动位置
total_length = total_length - 9
await try_validation(page, total_length) # 递归重新滑动
break
elem = await page.xpath('//div[@class="geetest_slider_button"]')
await page.waitFor(1000)
await elem[0].hover()
await page.waitFor(1000)
await page.mouse.down()
await page.waitFor(1000)
await page.mouse.move(mouse._x+lens_one, page.mouse._y, {'steps': random.randint(20, 30)})
await page.waitFor(random.randint(150, 300))
await page.mouse.move(mouse._x+lens_two, mouse._y, {'steps': random.randint(15, 20)})
await page.waitFor(random.randint(150, 300))
await page.mouse.move(mouse._x+len_three, mouse._y, {'steps': random.randint(10, 15)})
await page.waitFor(random.randint(150, 300))
await page.mouse.move(mouse._x+lens_four , mouse._y, {'steps': random.randint(5, 10)})
await page.waitFor(random.randint(200, 300))
await page.mouse.move(mouse._x+len_five, mouse._y, {'steps': random.randint(1, 3)})
await page.waitFor(random.randint(1000, 1500))
await page.mouse.up()
await page.waitFor(500)
# 判断是否滑动成功
res = await page.content() # 获取网页内容
response = Selector(res)
info = response.xpath('//div[@class="geetest_result_title"]/text()').extract_first()
print('滑动结果-info:{0}'.format(info))
if info is not None and '速度超过' in info:
#print('滑动成功')
break
if info is not None and '怪物吃了拼图' in info:
await page.waitFor(1500) # 等待加载
res = await page.content()
if '请点击此处重试' in res:
try:
submit = await page.xpath("//div[@class='geetest_panel_error_content']")
await submit[0].click()
await page.waitFor(random.randint(2000, 3000))
except Exception as e: #
print(e)
print('怪兽刷新点击的等待时间过短')
await page.waitFor(1500) # 等待加载
submit = await page.xpath("//div[@class='geetest_panel_error_content']")
await submit[0].click()
await page.waitFor(random.randint(2000, 3000))
finally:
is_monster = True
total_count=total_count+1
if total_count >= 5: # total_count 用于判断滑动次数,最多失败5次就重启页面
print('滑动失败 重新启动程序')
break
return total_count
except Exception as e:
print(e)
total_count = 5
return total_count
总结:因为加了失败处理程序,所以这个滑动成功率极高,除了出现极少数的网站崩溃和网站弹出的确认点选框。
新增:缺口图和残图都是通过不规则图片 通过style拼接成的处理方法,并优化了滑块滑动的方法:
def merge_image(image_file, location_list):
"""
拼接图片
:param image_file:
:param location_list:
:return:
"""
im = Image.open(image_file)
im.save('pic.jpg')
new_im = Image.new('RGB', (260, 116))
# 把无序的图片 切成52张小图片
im_list_upper = []
im_list_down = []
# print(location_list)
for location in location_list:
# print(location['y'])
if location['y'] == -58: # 上半边
im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x']) + 10, 116)))
if location['y'] == 0: # 下半边
im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 58)))
x_offset = 0
for im in im_list_upper:
new_im.paste(im, (x_offset, 0)) # 把小图片放到 新的空白图片上
x_offset += im.size[0]
x_offset = 0
for im in im_list_down:
new_im.paste(im, (x_offset, 58))
x_offset += im.size[0]
# new_im.show() # 图片展示
return new_im
async def get_image(page, div_path):
'''
下载无序的图片 然后进行拼接 获得完整的图片
:param page:
:param div_path: 图片的xpath路径
:return: image对象
'''
await page.waitFor(2000)
res = await page.content()
response = Selector(res)
background_images =response.xpath(div_path)
location_list = []
for item in background_images:
location ={}
style = item.xpath('@style').get()
result = re.findall('background-image: url\("(.*?)"\); background-position: (.*?)px (.*?)px;', style)
location['x'] = int(result[0][1])
location['y'] = int(result[0][2])
image_url = result[0][0]
location_list.append(location)
print(f'================={location_list}=================')
image_url = image_url.replace('webp', 'jpg')
image_result = requests.get(image_url).content
image_file = BytesIO(image_result) # 是一张无序的图片
image = merge_image(image_file, location_list)
return image # compute_gap 还是共用的
最后分享几个我学习pyppeteer的博客:
Python爬虫之pyppeteer的使用:Python爬虫之pyppeteer的使用(爬虫、获取cookie、截屏插件、防爬绕过)_墨痕诉清风的博客-CSDN博客_pyppeteer添加cookie
基于pyppeteer破解极验滑块验证码:基于pyppeteer模拟浏览器方式破解极验滑块验证码_Mr.Lee jack的博客-CSDN博客_pyppeteer 滑动验证码