看大佬的文章,用Python识别验证码_python 自动输入验证码-CSDN博客
修修补补的结果,直接贴源码吧
数字图片识别的源码(需要建立在模型训练的基础上):
import cv2
import time
import os
import numpy as np
import shutil
# 创建空的char文件夹
if not os.path.exists('char'):
os.makedirs('char')
im = cv2.imread(r'D:\\imgcode104.jfif')
# 灰度处理
im_gray = cv2.cvtColor(im, cv2.COLOR_BGR2GRAY)
# 二值化处理
ret, im_inv = cv2.threshold(im_gray, 127, 255, cv2.THRESH_BINARY_INV)
kernel = 1 / 16 * np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]])
im_blur = cv2.filter2D(im_inv, -1, kernel)
# 高斯模糊
kernel = 1 / 16 * np.array([[1, 2, 1], [2, 4, 2], [1, 2, 1]])
im_blur = cv2.filter2D(im_inv, -1, kernel)
# 再次二值化
ret, im_res = cv2.threshold(im_blur, 127, 255, cv2.THRESH_BINARY)
# 提取轮廓
contours, hierarchy = cv2.findContours(im_res, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 根据识别结果进行切割
result = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
result = []
for contour in contours:
x, y, w, h = cv2.boundingRect(contour)
box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
result.append(box)
print(result)
# 图片分割完成后,将分割后的单个字符的图片存成不同的图片文件
char_images = [] # 创建一个空列表
for box in result:
cv2.drawContours(im, [box], 0, (0, 0, 255), 2)
roi = im_res[box[0][1]:box[3][1], box[0][0]:box[1][0]]
roistd = cv2.resize(roi, (30, 30))
timestamp = int(time.time() * 1e6)
filename = "{}.jpg".format(timestamp)
filepath = os.path.join("char", filename)
cv2.imwrite(filepath, roistd)
char_image_info = {"box": box, "filepath": filepath} # 保存切割后字符图片的相关信息
char_images.append(char_image_info)
# 从label目录中加载已标注的数据
filenames = os.listdir("label")
samples = np.empty((0, 900))
labels = []
for filename in filenames:
filepath = os.path.join("label", filename)
label = filename.split(".")[0].split("_")[-1]
labels.append(label)
im = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
sample = im.reshape((1, 900)).astype(np.float32)
samples = np.append(samples, sample, 0)
samples = samples.astype(np.float32)
unique_labels = list(set(labels))
unique_ids = list(range(len(unique_labels)))
label_id_map = dict(zip(unique_labels, unique_ids))
id_label_map = dict(zip(unique_ids, unique_labels))
label_ids = list(map(lambda x: label_id_map[x], labels))
label_ids = np.array(label_ids).reshape((-1, 1)).astype(np.float32)
filenames = os.listdir("label")
samples = np.empty((0, 900))
labels = []
for filename in filenames:
filepath = os.path.join("label", filename)
label = filename.split(".")[0].split("_")[-1]
labels.append(label)
im = cv2.imread(filepath, cv2.IMREAD_GRAYSCALE)
sample = im.reshape((1, 900)).astype(np.float32)
samples = np.append(samples, sample, 0)
samples = samples.astype(np.float32)
unique_labels = list(set(labels))
unique_ids = list(range(len(unique_labels)))
label_id_map = dict(zip(unique_labels, unique_ids))
id_label_map = dict(zip(unique_ids, unique_labels))
label_ids = list(map(lambda x: label_id_map[x], labels))
label_ids = np.array(label_ids).reshape((-1, 1)).astype(np.float32)
model = cv2.ml.KNearest_create()
model.train(samples, cv2.ml.ROW_SAMPLE, label_ids)
char_images.sort(key=lambda x: x["box"][0][0]) # 根据字符图片在原始图像中的横坐标排序
result_str = "" # 初始化结果字符串
for char_image_info in char_images:
char_image = cv2.imread(char_image_info["filepath"], cv2.IMREAD_GRAYSCALE)
sample = char_image.reshape((1, 900)).astype(np.float32)
ret, results, neighbours, distances = model.findNearest(sample, k=3)
label_id = int(results[0, 0])
label = id_label_map[label_id]
result_str += label # 将每个字符的识别结果添加到结果字符串中
# 执行完整个逻辑后,清空char文件夹
shutil.rmtree('char')
print(result_str)
然后是最费力的一集(模型训练的前提):
简单来说就是多下载一些图片到本地,然后运行这个demo挨个人工输入识别结果.
我是下载了100张,识别准确率有90%吧,按道理是识别越多越准确,大家自行决定
import cv2 as cv
import time
import os
import glob
import sys
import numpy as np
def recognize_text(image):
# 边缘保留滤波 去噪
blur = cv.pyrMeanShiftFiltering(image, sp=8, sr=60)
# 灰度图像
gray = cv.cvtColor(blur, cv.COLOR_BGR2GRAY)
# 二值化
ret, binary = cv.threshold(gray, 127, 255, cv.THRESH_BINARY_INV)
contours, hierarchy = cv.findContours(binary, cv.RETR_EXTERNAL, cv.CHAIN_APPROX_SIMPLE)
result = []
for contour in contours:
x, y, w, h = cv.boundingRect(contour)
box = np.int0([[x, y], [x + w, y], [x + w, y + h], [x, y + h]])
result.append(box)
print(result)
for box in result:
cv.drawContours(image, [box], 0, (0, 0, 255), 2)
roi = binary[box[0][1]:box[3][1], box[0][0]:box[1][0]]
roistd = cv.resize(roi, (30, 30)) # 将字符图片统一调整为30x30的图片大小
timestamp = int(time.time() * 1e6) # 为防止文件重名,使用时间戳命名文件名
filename = "{}.jpg".format(timestamp)
filepath = os.path.join("char", filename)
cv.imwrite(filepath, roistd)
files = os.listdir("char")
for filename in files:
filename_ts = filename.split(".")[0]
patt = "label/{}_*".format(filename_ts)
saved_num = len(glob.glob(patt))
if saved_num == 1:
print("{} done".format(patt))
continue
filepath = os.path.join("char", filename)
im = cv.imread(filepath)
cv.imshow("image", im)
key = cv.waitKey(0)
if key == 27:
sys.exit()
if key == 13:
continue
char = chr(key)
filename_ts = filename.split(".")[0]
outfile = "{}_{}.jpg".format(filename_ts, char)
outpath = os.path.join("label", outfile)
cv.imwrite(outpath, im)
# 从1到100的循环
for i in range(1, 101):
src = cv.imread(r'D:\\imgcode' + str(i) + '.jfif')
recognize_text(src)
# # 显示图像
# cv.imshow("image", src)
# cv.waitKey(0)
#
# cv.destroyAllWindows()
最后是模型训练(源码和参考的一致):
上面的识别源码中已经整合进去了
import cv2 as cv
import os
import numpy as np
filenames = os.listdir("label")
samples = np.empty((0, 900))
labels = []
for filename in filenames:
filepath = os.path.join("label", filename)
label = filename.split(".")[0].split("_")[-1]
labels.append(label)
im = cv.imread(filepath, cv.IMREAD_GRAYSCALE)
sample = im.reshape((1, 900)).astype(np.float32)
samples = np.append(samples, sample, 0)
samples = samples.astype(np.float32)
unique_labels = list(set(labels))
unique_ids = list(range(len(unique_labels)))
label_id_map = dict(zip(unique_labels, unique_ids))
id_label_map = dict(zip(unique_ids, unique_labels))
label_ids = list(map(lambda x: label_id_map[x], labels))
label_ids = np.array(label_ids).reshape((-1, 1)).astype(np.float32)
model = cv.ml.KNearest_create()
model.train(samples, cv.ml.ROW_SAMPLE, label_ids)
最后提一下,记得提前创建label和char这两个文件夹,可能对新手来说很容易忽略吧