验证码有不少为了防止假人刷服务器而出现。
大部分像这样:当然还有类似的
对于刷票....会有点难受
我做了一个代码来解决
1,分类每一个
2,识别
#1:
import os
import os.path
import cv2
import glob
import imutils
CAPTCHA_IMAGES_PATH = "input_captcha_images"
LETTER_IMAGES_PATH = "output_letter_images"
# 列出输入文件夹中的所有验证码图像并循环遍历所有图像
captcha_image_files = glob.glob(os.path.join(CAPTCHA_IMAGES_PATH, "*"))
counts = {}
for (x, captcha_image_file) in enumerate(captcha_image_files):
print("[INFO] processing image {}/{}".format(x + 1, len(captcha_image_files)))
filename = os.path.basename(captcha_image_file)
captcha_correct_text = os.path.splitext(filename)[0]
# 加载图像后,将其转换为灰度图像并为其添加额外的填充
text_image = cv2.imread(captcha_image_file)
text_to_gray = cv2.cvtColor(text_image, cv2.COLOR_BGR2GRAY)
# 将图像转换为纯黑色和白色,识别图像的轮廓
thresh = cv2.threshold(text_to_gray, 0, 255, cv2.THRESH_BINARY_INV | cv2.THRESH_OTSU)[1]
contours, _ = cv2.findContours(thresh.copy(), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
# 检查OpenCV版本以保证兼容性
image_contours = contours[1]
letterImage_regions = []
# 遍历图像并利用相应的矩形获得所有边的轮廓
for image_contours in image_contours:
(x_axis, y_axis, wid, hig) = cr2.boundingRect(image_contours)
# 比较宽度和高度来检测相应的字母
if wid / hig > 1.25:
half_width = int(wid / 2)
else:
# 确解析验证码
if len(letterImage_regions) != 5:
continue
letterImage_regions = sorted(letterImage_regions, key=lambda x: x[0])
# 单独保存所有字母
for letterboundingbox, letter_in_text in zip(letterImage_regions, captcha_correct_text):
x_axis, y_axis, wid, hig = letterboundingbox
letter_in_image = text_to_gray[y_axis - 2:y_axis + hig + 2, x_axis - 2:x_axis + wid + 2]
# 最后,将图像保存在相应的文件夹中
save_p = os.path.join(LETTER_IMAGES_PATH, letter_in_text)
if not os.path.exists(save_p):
os.makedirs(save_p)
c = 0
else:
c = counts.get(letter_in_text, 1)
counts[letter_in_text] = c + 1
#2:
import cv2
import numpy as np
import pickle
import os
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split
from keras.models import Sequential
from keras.layers.core import Flatten, Dense
from helpers import resize_to_fit
# 定义模型和数据路径
LETTER_IMAGES_PATH = "output_letter_images"
MODEL = "captcha.hdf5"
MODEL_LABELS = "labels.dat"
# 初始化存储图像和标签的列表
dataimages = []
imagelabels = []
# 遍历LETTER_IMAGES_PATH中的所有图像文件
for image_file in paths.list_images(LETTER_IMAGES_PATH):
# 读取图像文件
text_image = cv2.imread(image_file)
# 将图像转换为灰度图
text_image = cv2.cvtColor(text_image, cv2.COLOR_BGR2GRAY)
# 调整图像大小以适应模型输入
text_image = resize_to_fit(text_image, 20, 20)
# 扩展维度以匹配模型输入要求
text_image = np.expand_dims(text_image, axis=2)
# 从图像文件路径中获取标签
text_label = image_file.split(os.path.sep)[-2]
# 将处理后的图像和标签添加到列表中
dataimages.append(text_image)
imagelabels.append(text_label)
# 将像素强度缩放到[0, 1]的范围内
dataimages = np.array(dataimages, dtype="float") / 255.0
imagelabels = np.array(imagelabels)
# 划分训练集和测试集
(X_train_set, X_test_set, Y_train_set, Y_test_set) = train_test_split(dataimages, imagelabels, test_size=0.2,random_state=42)
# 使用LabelBinarizer将标签转换为独热编码
lbzr = LabelBinarizer()
Y_train_set = lbzr.fit_transform(Y_train_set)
Y_test_set = lbzr.transform(Y_test_set)
# 将标签器对象序列化并保存到文件
with open(MODEL_LABELS, "wb") as f:
pickle.dump(lbzr, f)
nn_model = Sequential()
nn_model.add(Conv2D(20,(5,5),paddlng=rsame",Input_ahape=(20,20,1), activation="relu"))
nn_mode1.add(MaxPoo1ing2D(pool_size=(2,2),strldes=(2, 2))) mn_model.add(MaxPooling2D(poo1_size=(2, 2),strides=(2,2)))
nn_model.add(Flatten ())
nn_model.add(Dense (500, activation="relu")) nn_model.add(Dense(32, activation="softmax"))
nn_model.compile(loss="categorical_crossentropy", optimizer="adam", metrics=["accuracy"])
nn_model.fit(X_train_set, Y_train_set, validation_data=(X_test_set, Y_test_set),batch_size=32, epochs=10, verbose=1)
nn_model.save (MODEL)
这些就是主题
测试代码自由设计。
如有错误请指出。