python：验证码识别

最新推荐文章于 2023-05-05 15:52:06 发布

Robin Long 2018

最新推荐文章于 2023-05-05 15:52:06 发布

阅读量604

点赞数 2

分类专栏：图像处理文章标签： OpenCV

本文链接：https://blog.csdn.net/u013419318/article/details/102545735

版权

图像处理专栏收录该内容

29 篇文章 2 订阅

订阅专栏

1 介绍

本文介绍利用pytesseract中的image_to_string()函数实现验证码识别的方法。在使用该函数之前，需要将图像进行去噪和二值化处理。完整代码如下。

2 代码

import cv2
import numpy as np
import matplotlib.pylab as plt
import pytesseract
from PIL import Image

# 噪点处理
def interference_point(img_path):
    img_source = cv2.imread(img_path)

    h, w = img_source.shape[:2]
    # 灰值化
    img_source = cv2.cvtColor(img_source, cv2.COLOR_BGR2GRAY)
    # 遍历像素点进行处理
    for y in range(0, w):
        for x in range(0, h):
            # 去掉边框上的点
            if y == 0 or y == w - 1 or x == 0 or x == h - 1:
                img_source[x, y] = 255
                continue
            count = 0
            if img_source[x, y - 1] == 255:
                count += 1
            if img_source[x, y + 1] == 255:
                count += 1
            if img_source[x - 1, y] == 255:
                count += 1
            if img_source[x + 1, y] == 255:
                count += 1
            if count > 2:
                img_source[x, y] = 255
    return img_source

def binary(image):
    # 二值化
    ret,result = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_TRIANGLE)
    return result

def pixel_invert(image):
    result = []
    for i in range(0,len(image)):
        line = []
        for j in range(0,len(image[0])):
            if image[i][j] > 200:
                line.append(0)
            else:
                line.append(255)
        result.append(line)
    return np.array(result)

def image_binary(image_path):
    img = interference_point(image_path)
    img_binary = binary(img)
    invert = pixel_invert(img_binary)
    num = pytesseract.image_to_string(invert)
    return num

if __name__ == '__main__':

    num = image_binary('1.jpg')
    # img = cv2.imread('E:/20190701/G/095632/images/cut_0_116.jpg')
    # cv2.imshow('s',img)
    print(num)