1 介绍
本文介绍利用pytesseract中的image_to_string()函数实现验证码识别的方法。在使用该函数之前,需要将图像进行去噪和二值化处理。完整代码如下。
2 代码
import cv2
import numpy as np
import matplotlib.pylab as plt
import pytesseract
from PIL import Image
# 噪点处理
def interference_point(img_path):
img_source = cv2.imread(img_path)
h, w = img_source.shape[:2]
# 灰值化
img_source = cv2.cvtColor(img_source, cv2.COLOR_BGR2GRAY)
# 遍历像素点进行处理
for y in range(0, w):
for x in range(0, h):
# 去掉边框上的点
if y == 0 or y == w - 1 or x == 0 or x == h - 1:
img_source[x, y] = 255
continue
count = 0
if img_source[x, y - 1] == 255:
count += 1
if img_source[x, y + 1] == 255:
count += 1
if img_source[x - 1, y] == 255:
count += 1
if img_source[x + 1, y] == 255:
count += 1
if count > 2:
img_source[x, y] = 255
return img_source
def binary(image):
# 二值化
ret,result = cv2.threshold(image, 0, 255, cv2.THRESH_BINARY | cv2.THRESH_TRIANGLE)
return result
def pixel_invert(image):
result = []
for i in range(0,len(image)):
line = []
for j in range(0,len(image[0])):
if image[i][j] > 200:
line.append(0)
else:
line.append(255)
result.append(line)
return np.array(result)
def image_binary(image_path):
img = interference_point(image_path)
img_binary = binary(img)
invert = pixel_invert(img_binary)
num = pytesseract.image_to_string(invert)
return num
if __name__ == '__main__':
num = image_binary('1.jpg')
# img = cv2.imread('E:/20190701/G/095632/images/cut_0_116.jpg')
# cv2.imshow('s',img)
print(num)
3 效果展示
识别结果: