上期我们生成了微博验证码识别模型,这期我教大家怎么检验这个模型的好坏,在使用这个模型的时候需要先找一批不在训练集中的微博验证码图片并打上标签
# -*- coding=utf-8 -*-
from keras.models import load_model
import numpy as np
from scipy import misc
from keras.applications.xception import preprocess_input
import glob
img_size = (60, 160)
model = load_model('weibo.h5')
letter_list = [chr(i) for i in range(48, 58)] + [chr(i) for i in range(65, 91)]
def data_generator_test(data, n): # 样本生成器,节省内存
while True:
batch = np.array([data[n]])
x, y = [], []
for img in batch:
im = misc.imread(img)
im = im[:, :, :3]
im = misc.imresize(im, img_size)
x.append(im) # 读取resize图片,再存进x列表
y_list = []
real_num = img.split('.')[0].split('/')[-1]
for i in real_num:
i = i.upper()
if ord(i) - ord('A') >= 0:
y_list.append(ord(i) - ord('A') + 10)
else:
y_list.append(ord(i) - ord('0'))
y.append(y_list) # 把验证码标签添加到y列表,ord(i)-ord('a')把对应字母转化为数字a=0,b=1……z=26
x = preprocess_input(np.array(x).astype(float)) # 原先是dtype=uint8转成一个纯数字的array
y = np.array(y)
yield x, [y[:, i] for i in range(5)]
test_samples = glob.glob(r'weiboimgs/*.jpg')
def predict2(n):
"""
预测模型,输出其预测值和真实值
:param n:样本下标
:return:
"""
x, y = next(data_generator_test(test_samples, n))
z = model.predict(x)
z = np.array([i.argmax(axis=1) for i in z]).T
result = z.tolist()
v = []
for i in range(len(result)):
for j in result[i]:
v.append(letter_list[j])
# 输出测试结果
str = ''
for i in v:
str += i
real = ''
for i in y:
for j in i:
real += letter_list[j]
return str, real
def run():
"""
输出样本数、预测正确的数量、准确率
:return:
"""
n = 0
n_right = 0
for i in range(len(test_samples)):
n += 1
print('~~~~~~~~~~~~~%d~~~~~~~~~' % (n))
predict, real = predict2(i)
print(predict, real)
if real == predict:
n_right += 1
print('real:', real)
print('predict:', predict)
else:
pass
print('real:', real)
print('predict:', predict)
print(n, n_right, n_right / n)
if __name__ == '__main__':
run()
模型预测就到这里了,下期讲如何在项目中使用该模型