我们在上期已经制作好了通用模型,这期就要制作微博验证码的识别模型了
首先我们去微博获取验证码图片并对这些图片打标签(注意:这里打标签的图片越多生成的模型越好,识别率越高)
废话不多说,我们看代码:
import numpy as np
from scipy import misc
from keras.applications.xception import Xception, preprocess_input
import glob
from keras.layers import Input, Dense, Dropout
from keras.models import Model
images_split = 5
img_size = (60, 160)
input_image = Input(shape=(img_size[0], img_size[1], 3))
base_model = Xception(input_tensor=input_image, weights=None, include_top=False, pooling='avg')
predicts = [Dense(36, activation='softmax')(Dropout(0.5)(base_model.output)) for i in range(images_split)]
model = Model(inputs=input_image, outputs=predicts)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.load_weights('CaptchaForPython.h5')
def data_generator(data, batch_size): # 样本生成器,节省内存
while True:
batch = np.random.choice(data, batch_size)
x, y = [], []
for img in batch:
im = misc.imread(img)
im = im[:, :, :3]
im = misc.imresize(im, img_size)
x.append(im)
real_num = img.split('.')[0].split('/')[-1]
y_list = []
for i in real_num:
i = i.upper()
if ord(i) - ord('A') >= 0:
y_list.append(ord(i) - ord('A') + 10)
else:
y_list.append(ord(i) - ord('0'))
y.append(y_list)
x = preprocess_input(np.array(x).astype(float))
y = np.array(y)
yield x, [y[:, i] for i in range(images_split)]
# 获取指定目录下的所有图片
samples = glob.glob('weiboimgs/*.jpg')
np.random.shuffle(samples)
nb_train = 450
train_samples = samples[:nb_train]
test_samples = samples[nb_train:]
# Continue training
model.fit_generator(data_generator(train_samples, 30), steps_per_epoch=150, epochs=7,
validation_data=data_generator(test_samples, 10), validation_steps=5)
model.save('weibo.h5')
这段代码执行完成后就能得到真正的微博验证码识别模型了,大家快动手试试吧