前几天搭建好了tensorflow2的环境,今天来试验一下神奇的机器学习。
先简单编写一个java程序,收集了10000多个验证码图片,全部进行人工标注(训练素材点击下载),其中600多个用来检验预测结果。
先声明一下,验证码是从真实网站上爬取的,如下图,验证码中包含大写字母和数字,文字有旋转,背景和前景色是随机变化的,有斑点和曲线以及短划线干扰,字体看着有点像华文仿宋,大家可以根据是否对自己有用选择下载。
所有图片都已经经过人工标注,如以上验证码图片被标记为 2s6h-1586808953525-23a8680c-a80a-4deb-b1ca-a959c73a6c2f.jpg
使用以下代码进行训练,开始运行时要加载所有图片,时间有点长,大概1、2分钟,要耐心等待
# coding:utf-8
import numpy as np
import tensorflow as tf
#模型文件保存的路径
SAVE_PATH = "E:/tensorflow/vcode-analyzer/"
#验证码长宽
IMAGE_WIDTH = 130
IMAGE_HEIGHT = 40
#验证码可能出现的字符
number = ['2', '3', '4', '5', '6', '7', '8', '9']
ALPHABET = ['A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'J', 'K', 'L', 'M', 'N', 'P', 'Q', 'R', 'S', 'T', 'U',
'V', 'W', 'X', 'Y']
CHAR_SET = number + ALPHABET
CHAR_SET_LEN = len(CHAR_SET)
#验证码长度
MAX_CAPTCHA = 4
#训练批次的大小
BATCH_SIZE = 128
EPOCH = 4
#每次训练是否将错误记录打印出来
TRAIN_DEBUG = 0
def text2vec(text):
vector = np.zeros([MAX_CAPTCHA, CHAR_SET_LEN])
try:
for i, c in enumerate(text):
idx = CHAR_SET.index(str(c, encoding = "utf-8").upper())
vector[i][idx] = 1.0
except:
print("exception: " + str(text[0], encoding = "utf-8").upper()+ str(text[1], encoding = "utf-8").upper()+ str(text[2], encoding = "utf-8").upper()+ str(text[3], encoding = "utf-8").upper())
return vector
def vec2text(vec):
text = []
for i, c in enumerate(vec):
text.append(CHAR_SET[c])
return "".join(text)
current_index = 0
train_images = []
train_texts = []
def load_all_train_data(image_path):
global train_images
global train_texts
PATH = image_path
train_files = tf.data.Dataset.list_files(PATH+'*.jpg')
def load_image(image_file, is_train):
image = tf.io.read_file(image_file)
image = tf.image.decode_jpeg(image)
image = tf.image.rgb_to_grayscale(image)
return image
train_iter = iter(train_files)
for x in train_iter:
paths = tf.strings.split(x, '\\').numpy()
text = tf.strings.substr(paths[len(paths)-1], 0, 4, unit='BYTE', name=None)
train_texts.append(text2vec(tf.strings.bytes_split(text).numpy()))
train_images.append(load_image(x, True))
return
def get_next_batch(batch_size=128):
global train_images
global current_index
batch_x = np.zeros([batch_size, IMAGE_HEIGHT, IMAGE_WIDTH, 1])
batch_y = np.zeros([batch_size, MAX_CAPTCHA, CHAR_SET_LEN])
for i in range(batch_size):
if current_index >= len(train_images):
current_index = current_index - len(train_images)
if current_index < 0:
current_index = 0
batch_x[i, :] = train_images[current_index]
batch_y[i, :] = train_texts[current_index]
current_index = current_index + 1
return batch_x, batch_y
def crack_captcha_cnn():
model = tf.keras.Sequential()
model.add(tf.keras.layers.Conv2D(32, (2, 2)))
model.add(tf.keras.layers.PReLU())
model.add(tf.keras.layers.AveragePooling2D((2, 2)))
model.add(tf.keras.layers.Conv2D(64, (3, 3)))
model.add(tf.keras.layers.PReLU())
model.add(tf.keras.layers.MaxPool2D((2, 2)))
model.add(tf.keras.layers.Conv2D(128, (5, 5)))
model.add(tf.keras.layers.PReLU())
model.add(tf.keras.layers.MaxPool2D((2, 2)))
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(MAX_CAPTCHA * CHAR_SET_LEN))
model.add(tf.keras.layers.Reshape([MAX_CAPTCHA, CHAR_SET_LEN]))
model.add(tf.keras.layers.Softmax())
return model
def train(pat):
load_all_train_data(pat)
try:
model = tf.keras.models.load_model(SAVE_PATH + 'model')
except Exception as e:
print('#######Exception', e)
model = crack_captcha_cnn()
model.compile(optimizer='Adam',
metrics=['accuracy'],
loss='categorical_crossentropy')
for times in range(500000):
batch_x, batch_y = get_next_batch(BATCH_SIZE)
print('times=', times, ' batch_x.shape=',
batch_x.shape, ' batch_y.shape=', batch_y.shape)
model.fit(batch_x, batch_y, epochs=EPOCH)
if TRAIN_DEBUG == 1:
v1 = np.argmax(model.predict(batch_x), axis=2)
v2 = np.argmax(batch_y, axis=2)
vlen = len(v1)
for i in range(vlen):
if (v1[i] == v2[i]).all():
pass
else:
print("TRAIN_DEBUG: 实际 " + vec2text(v2[i]) + ", 预测 " + vec2text(v1[i]))
if 0 == times % 50:
print("save model at times=", times)
model.save(SAVE_PATH + 'model')
def predict(pat):
load_all_train_data(pat)
model = tf.keras.models.load_model(SAVE_PATH + 'model')
success = 0
count = 1000
for _ in range(count):
data_x, data_y = get_next_batch(1)
prediction_value = model.predict(data_x)
data_y = vec2text(np.argmax(data_y, axis=2)[0])
prediction_value = vec2text(np.argmax(prediction_value, axis=2)[0])
if data_y.upper() == prediction_value.upper():
print("y预测=", prediction_value, "y实际=", data_y, "预测成功。")
success += 1
else:
print("y预测=", prediction_value, "y实际=", data_y, "预测失败。")
print("预测", count, "次", "成功率 =", success / count)
pass
if __name__ == "__main__":
#传入训练、预测图片集的路径
#所有图片命名格式约定为 222s-xxxxxxxxxxxx.jpg
#222s为正确验证码,xxx为任意字符
train("D:\\projects\\vcode\\imgs-train\\")
#train("D:\\projects\\vcode\\imgs-coded\\")
predict("D:\\projects\\vcode\\imgs-coded\\")
收敛的很快,大概几百次循环loss就降到了0.1以下,accuracy持续稳定在0.9以上。
save model at times= 300
times= 301 batch_x.shape= (128, 40, 130, 1) batch_y.shape= (128, 4, 31)
Train on 128 samples
Epoch 1/4
128/128 [==============================] - 0s 656us/sample - loss: 0.1081 - accuracy: 0.9805
Epoch 2/4
128/128 [==============================] - 0s 593us/sample - loss: 0.0164 - accuracy: 0.9941
Epoch 3/4
128/128 [==============================] - 0s 617us/sample - loss: 0.0134 - accuracy: 0.9961
Epoch 4/4
128/128 [==============================] - 0s 570us/sample - loss: 0.0012 - accuracy: 1.0000
times= 302 batch_x.shape= (128, 40, 130, 1) batch_y.shape= (128, 4, 31)
Train on 128 samples
350次训练后,切换到测试集进行预测
y预测= AX74 y实际= AX74 预测成功。
y预测= UWCB y实际= UWCB 预测成功。
y预测= 6YKS y实际= 6YKS 预测成功。
y预测= DLUT y实际= DLUT 预测成功。
y预测= RK86 y实际= RK86 预测成功。
y预测= YMFV y实际= YMFV 预测成功。
y预测= 9F3K y实际= 9E3K 预测失败。
y预测= JY5D y实际= JY5D 预测成功。
y预测= WT93 y实际= HT93 预测失败。
y预测= G9NG y实际= G9NG 预测成功。
预测 1000 次 成功率 = 0.76
76%的正确率,还不错!要知道,除去加载图片的时间,实际训练的时间还不到1分钟……
可以查看后一篇:发布验证码图片识别服务