Python 爬虫 Selenium 中滑动验证码

最新推荐文章于 2025-02-28 14:31:36 发布

YKenan

最新推荐文章于 2025-02-28 14:31:36 发布

阅读量3k

点赞数 4

分类专栏： # 爬虫文章标签： selenium

本文链接：https://blog.csdn.net/YKenan/article/details/112058995

版权

爬虫专栏收录该内容

12 篇文章

订阅专栏

本文介绍了如何使用Vue实现图片滑动验证码，并利用Selenium进行识别和模拟滑动。首先，通过Vue插件vue-puzzle-vcode创建滑动验证码，然后用Selenium点击进入验证界面，获取验证码图片，通过图像处理匹配缺口位置，最后模拟滑动轨迹完成验证。代码示例详细展示了整个过程。

摘要生成于 C知道，由 DeepSeek-R1 满血版支持，前往体验 >

Python 爬虫 Selenium 中滑动验证码

免责声明：自本文章发布起, 本文章仅供参考，不得转载，不得复制等操作。浏览本文章的当事人如涉及到任何违反国家法律法规造成的一切后果由浏览本文章的当事人自行承担与本文章博客主无关。以及由于浏览本文章的当事人转载，复制等操作涉及到任何违反国家法律法规引起的纠纷和造成的一切后果由浏览本文章的当事人自行承担与本文章博客主无关。

1. Vue 实现图片滑动验证码

1.1 安装插件

使用 vue-puzzle-vcode 插件:
官方网址: https://gitee.com/beeworkshop/vue-puzzle-vcode/tree/master/

npm install vue-puzzle-vcode --save

1.2 实现例子

设置更多参数看官网

<template>
  <div id="app">
    <Vcode
        :imgs="imgs"
        :show="isShow"
        @success="success"
        @close="close"
    />
    <button @click="submit">登录</button>
  </div>
</template>

<script>
import Vcode from "vue-puzzle-vcode";

import img1 from "./assets/1.webp";
import img2 from "./assets/2.jpg";
import img3 from "./assets/3.jpg";
import img4 from "./assets/4.jpg";

export default {
  data() {
    return {
      isShow: false, // 验证码模态框是否出现
      imgs: [img1, img2, img3, img4]  // 验证码模态框背景照片
    };
  },
  components: {
    Vcode
  },
  methods: {
    submit() {
      this.isShow = true;
    },
    // 用户通过了验证
    success(msg) {
      console.log(msg);
      this.isShow = false; // 通过验证后，需要手动隐藏模态框
    },
    // 用户点击遮罩层，应该关闭模态框
    close() {
      this.isShow = false;
    }
  }
};
</script>

<style>
#app {
  display: flex;
  justify-content: center;
  align-items: center;
  height: 100px;
}

button {
  font-size: 25px;
  padding: 5px 30px;
  border-radius: 0;
  background-color: #48a9ff;
  border: aliceblue;
  cursor: pointer;
}
</style>

效果

在这里插入图片描述

2. 用 Selenium 识别

2.1 识别思路

识别滑动验证码的步骤:

点击进入滑动界面
得到完整的图片
得到缺口的图片
匹配缺口照片在完整照片的位置
机器模拟人工滑动轨迹

2.2 点击进入滑动界面

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import time
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC


# 定义一个类
class Sliding(object):

    # 初始化
    def __init__(self):
        self.url = 'http://localhost:8080/'
        self.driver = webdriver.Firefox(executable_path=r"D:\install\python\python\Scripts\geckodriver.exe")
        self.wait = WebDriverWait(self.driver, 10)

    # 点击进入滑动界面
    def enter_page(self):
        # 访问界面
        self.driver.get(sliding.url)
        # 等待
        button = self.wait.until(lambda d: d.find_element_by_tag_name("button"))
        # 点击
        button.click()


if __name__ == '__main__':
    # 实例化对象
    sliding = Sliding()
    # 点击进入滑动界面
    sliding.enter_page()

    # 关闭浏览器
    # driver.close()

在这里插入图片描述

2.2 得到验证码图片

通过 selenium 执行 js 获取前端 canvas 照片 base64, 再将 base64 转化为 Image

# base64 转化为 image
def base64_to_image(base64_str, image_path=None):
    base64_data = re.sub('^data:image/.+;base64,', '', base64_str)
    byte_data = base64.b64decode(base64_data)
    image_data = BytesIO(byte_data)
    img = Image.open(image_data)
    if image_path:
        img.save(image_path)
    return img

# 得到完整的验证码图片,
def get_images(self):
    # 得到完整的图片 base64 数据
    full_js = "return document.getElementsByTagName('canvas')[1].toDataURL('image/png')"
    full_image = self.driver.execute_script(full_js)
    # 得到缺口的图片 base64 数据
    gap_js = "return document.getElementsByTagName('canvas')[2].toDataURL('image/png')"
    gap_image = self.driver.execute_script(gap_js)
    # 设置保存路径
    full_path = "./data/full_image.png"
    gap_path = "./data/gap_image.png"
    # 转换
    base64_to_image(full_image, full_path)
    base64_to_image(gap_image, gap_path)
    # 返回路径
    return full_path, gap_path

2.3 匹配缺口照片在完整照片的位置

这里用 cv2.matchTemplate() 方法匹配, 精准度感觉一般般

    # 匹配缺口照片在完整照片的位置
    def match_gaps(self, full, pag):
        # 读取图片文件信息
        img_full = cv2.imread(full)
        # 以灰度模式加载图片
        template = cv2.imread(pag)
        # 方法
        methods = [cv2.TM_SQDIFF_NORMED, cv2.TM_CCORR_NORMED, cv2.TM_CCOEFF_NORMED]
        # 记录每个方法的距离
        left = []
        # 最接近值
        min_ = []
        for method in methods:
            # 匹配
            res = cv2.matchTemplate(img_full, template, method)
            # 获取相关内容
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            if method == cv2.TM_SQDIFF_NORMED:
                min_.append(min_val - 0.0)
                left.append(min_loc[0])
            else:
                min_.append(1.0 - max_val)
                left.append(max_loc[0])
        index = min_.index(np.min(min_))
        print("选用第 {:d} 个方法, 差为: {:f}, 距离为: {:d}".format(index + 1, min_[index], left[index]))
        return left[index]

2.4 机器模拟人工滑动轨迹

这里模拟人类先快后慢在抖动一下的动作
再将这个识别过程若出现错误，重新识别，进行递归

完整代码

#!/usr/bin/env python
# -*- coding: UTF-8 -*-

import re
import time

import cv2
import random
import base64
import numpy as np
from PIL import Image
from io import BytesIO
from selenium import webdriver
from selenium.webdriver import ActionChains
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait


# base64 转化为 image
def base64_to_image(base64_str, image_path=None):
    base64_data = re.sub('^data:image/.+;base64,', '', base64_str)
    byte_data = base64.b64decode(base64_data)
    image_data = BytesIO(byte_data)
    img = Image.open(image_data)
    if image_path:
        img.save(image_path)
    return img


# 定义一个类
class Sliding(object):

    # 初始化
    def __init__(self):
        self.url = 'http://localhost:8080/'
        self.driver = webdriver.Firefox(executable_path=r"D:\install\python\python\Scripts\geckodriver.exe")
        self.wait = WebDriverWait(self.driver, 10)

    # 滑块移动轨迹
    def get_track(self, distance):
        track = []
        current = 0
        # 阈值
        mid = distance * 3 / 4
        t = random.randint(5, 6) / 10
        v = 0
        while current < distance:
            if current < mid:
                a = 6
            else:
                a = -7
            v0 = v
            v = v0 + a * t
            move = v0 * t + 3 / 4 * a * t * t
            current += move
            track.append(round(move))
        return track

    # 判断是否完成操作
    def judge_show(self):
        time.sleep(3)
        show_ = "return document.getElementsByClassName('show_')"
        show_is = self.driver.execute_script(show_)
        return len(show_is) > 0

    # 点击进入滑动界面
    def enter_page(self):
        # 访问界面
        self.driver.get(sliding.url)
        # 等待
        button = self.wait.until(lambda d: d.find_element_by_tag_name("button"))
        # 点击
        button.click()

    # 得到完整的验证码图片,
    def get_images(self):
        # 得到完整的图片 base64 数据
        full_js = "return document.getElementsByTagName('canvas')[1].toDataURL('image/png')"
        full_image = self.driver.execute_script(full_js)
        # 得到缺口的图片 base64 数据
        gap_js = "return document.getElementsByTagName('canvas')[2].toDataURL('image/png')"
        gap_image = self.driver.execute_script(gap_js)
        # 设置保存路径
        full_path = "./data/full_image.png"
        gap_path = "./data/gap_image.png"
        # 转换
        base64_to_image(full_image, full_path)
        base64_to_image(gap_image, gap_path)
        # 返回路径
        return full_path, gap_path

    # 匹配缺口照片在完整照片的位置
    def match_gaps(self, full, pag):
        # 读取图片文件信息
        img_full = cv2.imread(full)
        # 以灰度模式加载图片
        template = cv2.imread(pag)
        # 方法
        methods = [cv2.TM_SQDIFF_NORMED, cv2.TM_CCORR_NORMED, cv2.TM_CCOEFF_NORMED]
        # 记录每个方法的距离
        left = []
        # 最接近值
        min_ = []
        for method in methods:
            # 匹配
            res = cv2.matchTemplate(img_full, template, method)
            # 获取相关内容
            min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
            if method == cv2.TM_SQDIFF_NORMED:
                min_.append(min_val - 0.0)
                left.append(min_loc[0])
            else:
                min_.append(1.0 - max_val)
                left.append(max_loc[0])
        index = min_.index(np.min(min_))
        print("选用第 {:d} 个方法, 差为: {:f}, 距离为: {:d}".format(index + 1, min_[index], left[index]))
        return left[index]

    # 机器模拟人工滑动轨迹
    def sliding_track(self, distance):
        # 获取按钮
        slider = self.driver.find_element(By.CSS_SELECTOR, ".range-btn")
        ActionChains(self.driver).click_and_hold(slider).perform()
        # ActionChains(self.driver).move_by_offset(distance, yoffset=0).perform()
        # 获取轨迹
        track = self.get_track(distance)
        print(track)
        for t in track:
            ActionChains(self.driver).move_by_offset(xoffset=t, yoffset=0).perform()
        ActionChains(self.driver).move_by_offset(xoffset=-5, yoffset=0).perform()
        ActionChains(self.driver).move_by_offset(xoffset=5, yoffset=0).perform()
        ActionChains(self.driver).release().perform()

    # 递归调用
    def loop(self):
        # 得到验证码图片
        full_img_path, gap_img_path = sliding.get_images()
        # 匹配缺口照片在完整照片的位置
        number = sliding.match_gaps(full_img_path, gap_img_path)
        # 机器模拟人工滑动轨迹
        sliding.sliding_track(number)
        if self.judge_show():
            self.loop()


if __name__ == '__main__':
    try:
        # 实例化对象
        sliding = Sliding()
        # 点击进入滑动界面
        sliding.enter_page()
        # 递归调用
        sliding.loop()

        # 关闭浏览器
        # driver.close()
    except:
        print("Error")