基于pyppeteer 破解极验滑块验证码

Cqw150

已于 2022-11-10 10:54:27 修改

阅读量1.8k

点赞数 2

分类专栏： python pyppeteer 文章标签： python

于 2021-06-28 16:23:52 首次发布

本文链接：https://blog.csdn.net/Cqw150/article/details/118305687

版权

python 同时被 2 个专栏收录

2 篇文章 0 订阅

订阅专栏

pyppeteer

1 篇文章 0 订阅

订阅专栏

本文章主要介绍如何使用pyppeteer破解滑块验证码，主要分为3个步骤：

1.下载滑块验证码的原图和缺口图片。

2.计算缺口偏移量

3.模拟滑块滑动，并处理结果

首先这次的目标网站是国家企业标准网：http://www.qybz.org.cn/standardProduct/toAdvancedResult.do（该网站已经改变，滑块处理逻辑还是通用）

这个网站打开的时候就有一个滑块验证码需要点击，首先是获取原图和缺口图

import asyncio
import base64

import random
import time

from parsel import Selector
from PIL import Image, ImageChops
import matplotlib.pyplot as plt
from pyppeteer import launch


async def get_pic(page):
    '''获取图片  '''
    fulljs = """
                () => { return document.getElementsByClassName("geetest_canvas_fullbg")[0].toDataURL("image/png") }
                """
    fadejs = """
                () => {  return document.getElementsByClassName("geetest_canvas_bg geetest_absolute")[0].toDataURL("image/png")}
                """
    full_img = await page.evaluate(fulljs)
    await get_decode_image(filename="./img/fullbg.png", data=full_img)
    await asyncio.sleep(0.1)
    fade_img = await page.evaluate(fadejs)
    await get_decode_image(filename="./img/fadebg.png", data=fade_img)

async def get_decode_image(filename, data):
    _, img = data.split(",")
    img = base64.b64decode(img)
    with open(filename, "wb") as f:
        f.write(img)

图片样例：

接下来就是计算缺口的距离：

async def compute_gap(img1, img2):
    plt.cla()
    img1 = Image.open(img1)
    img2 = Image.open(img2)

    # 将图片修改为RGB模式
    img1 = img1.convert("RGB")
    img2 = img2.convert("RGB")

    # 计算差值
    diff = ImageChops.difference(img1, img2)

    plt.figure('name1')
    # plt.imshow(diff, cmap='gray')
    # plt.show()
    plt.clf()  # 清空图片
    plt.close('all')
    table = []
    for i in range(256):
        if i < 50:
            table.append(0)
        else:
            table.append(1)
    # 灰度图
    diff = diff.convert("L")
    # # 二值化
    diff = diff.point(table, '1')
    # print(diff.getbbox())  # 这里可以直接获取差异坐标点坐标顺序为左上右下
    left = 43
    # # 这里做了优化为减少误差 纵坐标的像素点大于5时才认为是找到
    # # 防止缺口有凸起时有误差
    for w in range(left, diff.size[0]):
        lis = []
        for h in range(diff.size[1]):
            if diff.load()[w, h] == 1:
                lis.append(w)
            if len(lis) > 5:
                return w
注：这里获取的缺口位移坐标相对于最终位移距离 还需 -9 因为滑块的位置不是在图片最左边，中间还有一丝缝隙。（这个9是本人测试出来的，仅供参考）

最后是模拟滑块滑动并处理结果：

async def try_validation(page,total_length,total_count=0):   #模拟滑动滑块
    '''滑动滑块'''
    try:
        lens_one = (x+total_length)/5+70  # 这里直接切割分开5断位移距离 成功率也很高
        lens_two = (x+total_length)/5+30
        len_three = (x+total_length)/5-20
        len_four = (x+total_length)/5-30
        len_five = (x+total_length)/5-50
        is_monster = False
        while True:  
            if is_monster:  # 判断是否怪兽吃了 要重新获取图片 处理滑动太快的情况
                print('重新获取图片')
                await page.waitFor(500)
                await get_pic(page)  # 获取原图和缺口图
                await page.waitFor(500)
                total_length = await compute_gap(img1="./img/fullbg.png", img2="./img/fadebg.png")  # 计算滑块移动位置
                total_length = total_length - 9
                await try_validation(page, total_length) # 递归重新滑动
                break
            elem = await page.xpath('//div[@class="geetest_slider_button"]')
            await page.waitFor(1000)
            await elem[0].hover()
            await page.waitFor(1000)
            await page.mouse.down()
            await page.waitFor(1000)
            await page.mouse.move(mouse._x+lens_one, page.mouse._y, {'steps': random.randint(20, 30)})
            await page.waitFor(random.randint(150, 300))
            await page.mouse.move(mouse._x+lens_two, mouse._y, {'steps': random.randint(15, 20)}) 
            await page.waitFor(random.randint(150, 300))
            await page.mouse.move(mouse._x+len_three, mouse._y, {'steps': random.randint(10, 15)})
            await page.waitFor(random.randint(150, 300))
            await page.mouse.move(mouse._x+lens_four , mouse._y, {'steps': random.randint(5, 10)}) 
            await page.waitFor(random.randint(200, 300))
            await page.mouse.move(mouse._x+len_five, mouse._y, {'steps': random.randint(1, 3)})  
            await page.waitFor(random.randint(1000, 1500))
            await page.mouse.up()
            await page.waitFor(500)
            # 判断是否滑动成功
            res = await page.content()  # 获取网页内容
            response = Selector(res)
            info = response.xpath('//div[@class="geetest_result_title"]/text()').extract_first()
            print('滑动结果-info：{0}'.format(info))
            if info is not None and '速度超过' in info:
                #print('滑动成功')
                break
            if info is not None and '怪物吃了拼图' in info:   
                await page.waitFor(1500)  # 等待加载
                res = await page.content()
                if '请点击此处重试' in res:
                    try:
                        submit = await page.xpath("//div[@class='geetest_panel_error_content']")
                        await submit[0].click()
                        await page.waitFor(random.randint(2000, 3000))
                    except Exception as e:  # 
                        print(e)
                        print('怪兽刷新点击的等待时间过短')
                        await page.waitFor(1500)  # 等待加载
                        submit = await page.xpath("//div[@class='geetest_panel_error_content']")
                        await submit[0].click()
                        await page.waitFor(random.randint(2000, 3000))
                    finally: 
                        is_monster = True
            total_count=total_count+1
            if total_count >= 5:  # total_count  用于判断滑动次数，最多失败5次就重启页面
                print('滑动失败 重新启动程序')
                break
        return total_count
    except Exception as e:
        print(e)
        total_count = 5
        return total_count

总结：因为加了失败处理程序，所以这个滑动成功率极高，除了出现极少数的网站崩溃和网站弹出的确认点选框。

新增：缺口图和残图都是通过不规则图片通过style拼接成的处理方法，并优化了滑块滑动的方法：

def merge_image(image_file, location_list):
    """
     拼接图片
    :param image_file:
    :param location_list:
    :return:
    """
    im = Image.open(image_file)
    im.save('pic.jpg')
    new_im = Image.new('RGB', (260, 116))
    # 把无序的图片 切成52张小图片
    im_list_upper = []
    im_list_down = []
    # print(location_list)
    for location in location_list:
        # print(location['y'])
        if location['y'] == -58:  # 上半边
            im_list_upper.append(im.crop((abs(location['x']), 58, abs(location['x']) + 10, 116)))
        if location['y'] == 0:  # 下半边
            im_list_down.append(im.crop((abs(location['x']), 0, abs(location['x']) + 10, 58)))

    x_offset = 0
    for im in im_list_upper:
        new_im.paste(im, (x_offset, 0))  # 把小图片放到 新的空白图片上
        x_offset += im.size[0]

    x_offset = 0
    for im in im_list_down:
        new_im.paste(im, (x_offset, 58))
        x_offset += im.size[0]
    # new_im.show()   # 图片展示
    return new_im

async def get_image(page, div_path):

    '''
    下载无序的图片  然后进行拼接 获得完整的图片
    :param page:
    :param div_path: 图片的xpath路径
    :return: image对象
    '''
    await page.waitFor(2000)
    res = await page.content()
    response = Selector(res)
    background_images =response.xpath(div_path)
    location_list = []
    for item in background_images:
        location ={}
        style = item.xpath('@style').get()
        result = re.findall('background-image: url\("(.*?)"\); background-position: (.*?)px (.*?)px;', style)
        location['x'] = int(result[0][1])
        location['y'] = int(result[0][2])

        image_url = result[0][0]
        location_list.append(location)
    print(f'================={location_list}=================')
    image_url = image_url.replace('webp', 'jpg')
    image_result = requests.get(image_url).content
  
    image_file = BytesIO(image_result)  # 是一张无序的图片
    image = merge_image(image_file, location_list)
    return image  # compute_gap 还是共用的

最后分享几个我学习pyppeteer的博客：

Python爬虫之pyppeteer的使用：Python爬虫之pyppeteer的使用（爬虫、获取cookie、截屏插件、防爬绕过）_墨痕诉清风的博客-CSDN博客_pyppeteer添加cookie

基于pyppeteer破解极验滑块验证码：基于pyppeteer模拟浏览器方式破解极验滑块验证码_Mr.Lee jack的博客-CSDN博客_pyppeteer 滑动验证码

Cqw150

关注

2
点赞
踩
16

收藏

觉得还不错? 一键收藏
0
评论
基于pyppeteer 破解极验滑块验证码

本文章主要介绍如何使用pyppeteer破解滑块验证码，主要分为3个步骤：1.下载滑块验证码的原图和缺口图片。2.计算缺口偏移量3.模拟滑块滑动，并处理结果首先这次的目标网站是国家企业标准网：http://www.qybz.org.cn/standardProduct/toAdvancedResult.do这个网站打开的时候就有一个滑块验证码需要点击，首先是获取原图和缺口图async def get_pic(page): '''获取图片 ...
复制链接

扫一扫