python_字母/数字验证码获取

檬柠wan

已于 2023-06-10 22:14:36 修改

阅读量755

点赞数 2

分类专栏： # web篇文章标签： python chrome 开发语言

于 2023-01-12 10:38:16 首次发布

本文链接：https://blog.csdn.net/qq_45664055/article/details/128650414

版权

web篇专栏收录该内容

7 篇文章 4 订阅

订阅专栏

本文介绍了如何利用Python的ddddocr库结合selenium的ChromeWebDriver来自动化登录网站并识别验证码。通过截取网页中的验证码图片，然后用ddddocr进行识别，可以实现对数字/字母验证码的自动处理。

摘要由CSDN通过智能技术生成

前言

在使用自动化登录网站时，经常输入用户名和密码后就会遇到验证码，今天介绍一款通用验证码识别 OCR库，它的名字是ddddocr。

一、安装

pip install ddddocr

二、使用ddddocr

获取数字/字母验证码

在这里插入图片描述

方法一

import os
import ddddocr
from PIL import Image
from selenium import webdriver
from selenium.webdriver.common.by import By


class GetVerificationCode:

    def __init__(self):
        # 创建了一个Chrome浏览器的WebDriver实例
        self.drive = webdriver.Chrome()
        # 最大化窗口
        self.drive.maximize_window()
        # 隐式等待,设置最大的等待时长,只对查找元素(find_elementXXX)生效
        self.drive.implicitly_wait(2)

    def getverification(self, location):
        '''获取验证码信息'''
        # 获取当前文件的位置
        current_location = os.path.dirname(__file__)

        # 截取当前网页并放到指定目录下，并命名为printscreen，该截图中有我们需要的验证码
        self.drive.save_screenshot(f'{current_location}\\printscreen.png')

        # 定位验证码位置
        position = self.drive.find_element(By.CSS_SELECTOR, location)

        # 获取验证码x, y轴坐标
        location = position.location
        # 获取验证码的宽度、高度
        size = position.size

        # 需要截取的位置坐标
        rangle = (int(location['x']), int(location['y']),
                  int(location['x'] + size['width']),
                  int(location['y'] + size['height']))

        # 打开截图
        i = Image.open(f'{current_location}\\printscreen.png')
        # 使用Image的crop函数，从截图中再次截取我们需要的区域
        fimg = i.crop(rangle)
        fimg = fimg.convert('RGB')
        # 保存我们截下来的验证码图片，并读取验证码内容
        fimg.save(f'{current_location}\\code.png')

        # 获取验证码
        ocr = ddddocr.DdddOcr()
        with open(f'{current_location}\\code.png', 'rb') as f:
            img_bytes = f.read()
        self.res = ocr.classification(img_bytes)
        print('识别出的验证码为：' + self.res)

    def login(self):
        '''登录'''
        # 打开网站
        self.drive.get('需要获取验证码的网站')
        # 获取验证码
        self.getverification('.bgw')


if __name__ == '__main__':
    run = GetVerificationCode()
    run.login()

方法二

import os
import ddddocr
from selenium import webdriver
from selenium.webdriver.common.by import By


class GetVerificationCode:

    def __init__(self):
        # 创建了一个Chrome浏览器的WebDriver实例
        self.drive = webdriver.Chrome()
        # 最大化窗口
        self.drive.maximize_window()
        # 隐式等待,设置最大的等待时长,只对查找元素(find_elementXXX)生效
        self.drive.implicitly_wait(2)

    def getVerification(self, location):
        '''获取验证码信息'''
        # 获取当前文件的位置
        current_location = os.path.dirname(__file__)

        # 定位验证码位置
        position = self.drive.find_element(By.CSS_SELECTOR, location)

        # 截取页面上固定元素的图片(验证码)
        position.screenshot(f'{current_location}\\code.png')

        # 获取验证码
        ocr = ddddocr.DdddOcr()
        with open(f'{current_location}\\code.png', 'rb') as f:
            img_bytes = f.read()
        self.res = ocr.classification(img_bytes)
        print('识别出的验证码为：' + self.res)

    def login(self):
        '''登录'''
        # 打开网站
        self.drive.get('需要获取验证码的网站')
        # 获取验证码
        self.getVerification('.bgw')


if __name__ == '__main__':
    run = GetVerificationCode()
    run.login()