简易的验证码自动识别脚本

最新推荐文章于 2024-03-31 15:50:11 发布

小菜菜forever

最新推荐文章于 2024-03-31 15:50:11 发布

阅读量5.2k

点赞数

分类专栏：脚本

本文链接：https://blog.csdn.net/qq_37657182/article/details/103929659

版权

脚本专栏收录该内容

2 篇文章 0 订阅

订阅专栏

准备工作

python环境，PILLOW库，pytesseract库

环境搭建

python环境和PILLOW库的搭建不再赘述，主要是pytesseract库搭建需要注意一下，具体搭建细节，参考：pytesseract库安装与使用

项目工程结构图如下：

在这里插入图片描述

最后附上Verify模块中的代码

#-*-coding:utf-8-*-
import os
from PIL import Image,ImageDraw
import pytesseract

class Recognizer(object):
    def __init__(self):
        self.ParentPath = os.path.abspath(os.path.join(os.getcwd(),".."))  #上级目录
        self.DataPath = os.path.join(self.ParentPath,'Data')           #Data目录
        self.ResultsPath = os.path.join(self.ParentPath,'Results')     #Reulsts目录
        self.BlackWhiteImage = None  #存放灰度图像经过阈值划分后的0-1二值图像
        self.tessdata_dir_config = '--tessdata-dir "D:\\Tesseract-OCR\\tessdata"'
        self.t2val = {}

    def ToBlack(self,path):
        """
        将彩色
        :param path: 
        :return: 
        """
        img = Image.open(path)
        w, h = img.size
        for x in range(w):     #这个更改RGB值好像没有什么用
            for y in range(h):
                r, g, b = img.getpixel((x, y))    #检索指定坐标点的像素的RGB颜色值
                if 190 <= r <= 255 and 170 <= g <= 255 and 0 <= b <= 140:
                    img.putpixel((x, y), (0, 0, 0))
                if 0 <= r <= 90 and 210 <= g <= 255 and 0 <= b <= 90:
                    img.putpixel((x, y), (0, 0, 0))
        # img = img.convert('L')       #灰度图像，不是二值图像
        img = img.convert('L').point([0] * 150 + [1] * (256 - 150), '1')  #RGB先转换为灰度公式，然后根据灰度值设定阈值，进行二值化
        self.BlackWhiteImage = img
        return img

    def twoValue(self, G=100):
        for y in xrange(0, self.BlackWhiteImage.size[1]):
            for x in xrange(0, self.BlackWhiteImage.size[0]):
                self.t2val[(x, y)] = self.BlackWhiteImage.getpixel((x, y))  # 这个是二值后的图像像素,将0,255转换为0,1矩阵

    def CleanNoise(self,N=3,Z=2):
        # 根据一个点A的RGB值，与周围的8个点的RBG值比较，设定一个值N（0 <N <8），当A的RGB值与周围8个点的RGB相等数小于N时，此点为噪点
        # N: Integer 降噪率 0 <N <8,常见的设置为3，效果还行
        # Z: Integer 降噪次数
        # 输出
        #  0：降噪成功
        #  1：降噪失败
        for i in xrange(0, Z):
            self.t2val[(0, 0)] = 1
            self.t2val[(self.BlackWhiteImage.size[0] - 1, self.BlackWhiteImage.size[1] - 1)] = 1

            for x in xrange(1, self.BlackWhiteImage.size[0] - 1):
                for y in xrange(1, self.BlackWhiteImage.size[1] - 1):
                    nearDots = 0
                    L = self.t2val[(x, y)]
                    if L == self.t2val[(x - 1, y - 1)]:
                        nearDots += 1
                    if L == self.t2val[(x - 1, y)]:
                        nearDots += 1
                    if L == self.t2val[(x - 1, y + 1)]:
                        nearDots += 1
                    if L == self.t2val[(x, y - 1)]:
                        nearDots += 1
                    if L == self.t2val[(x, y + 1)]:
                        nearDots += 1
                    if L == self.t2val[(x + 1, y - 1)]:
                        nearDots += 1
                    if L == self.t2val[(x + 1, y)]:
                        nearDots += 1
                    if L == self.t2val[(x + 1, y + 1)]:
                        nearDots += 1

                    if nearDots < N:
                        self.t2val[(x, y)] = 1  # 1白，0黑

    def recognize_captcha(self):
        image = Image.new("1", self.BlackWhiteImage.size)
        draw = ImageDraw.Draw(image)
        for x in xrange(0, self.BlackWhiteImage.size[0]):
            for y in xrange(0, self.BlackWhiteImage.size[1]):
                draw.point((x, y), self.t2val[(x, y)])
        num = pytesseract.image_to_string(image, config=self.tessdata_dir_config)
        return num

    def BeginRecogize(self,path):
        self.ToBlack(path)
        self.twoValue()
        self.CleanNoise(N=3,Z=2)
        res = self.recognize_captcha()
        res = self.StandardOutput(res)
        return res

    def StandardOutput(self,output):
        temp = ''
        for i in range(len(str(output))):
            if(str(output)[i]!=' '):
                temp = temp + str(output)[i]
        return temp.upper()


if __name__ == '__main__':
   # p1 = os.path.abspath(os.path.join(os.getcwd(),".."))
   # print os.path.join(p1,'Data')
   # print ([0] * 150 + [1] * (256 - 150))
   r1 = Recognizer()
   p1 = os.path.join(r1.DataPath,'7.png')
   res = r1.BeginRecogize(p1)
   print res

方法效果：

原图
在这里插入图片描述
识别结果：

但是针对，一些其他验证码不行，如：

识别结果为：

总的来说，效果还有待提高，主要放在图像预处理的算法上，去噪是很关键的一步

小菜菜forever

关注

0
点赞
踩
8

收藏

觉得还不错? 一键收藏
0
评论
简易的验证码自动识别脚本

准备工作python环境，PILLOW库，pytesseract库环境搭建python环境和PILLOW库的搭建不再赘述，主要是pytesseract库搭建需要注意一下，具体搭建细节，参考：pytesseract库安装与使用项目工程结构图如下：最后附上Verify模块中的代码#-*-coding:utf-8-*-import osfrom PIL import Image,Ima...
复制链接

扫一扫