以下代码只是博主的学习笔记,由于为了稍稍符合读者需求,经过了一些修改,可能在修改过程中出现一些小bug,见谅
首先介绍一下SVHN(Street View House Number)Dateset 来源于谷歌街景门牌号码,这里提供一下地址,本代码只需要下载格式2中的train和test.mat即可,格式2中所有数字均已调整为固定的32 x 32像素分辨率,原始字符边框在适当的尺寸上扩展为方形窗口,因此将它们的大小调整为32 x 32像素不会引起纵横比失真。然而,这种预处理将一些分散注意力的数字引入到感兴趣数字的侧面。加载.mat文件会创建2个变量:X是包含图像的4-D矩阵,而y是类标签的向量。
话不多说,进入正题,下面我写了两份代码,分别是生成tfl模型和h5模型的,其中生成的tfl模型是对其他博主的源码稍稍修改后的复用,地址也放在这了
数据处理
数据集含有两个变量 X 代表图像, 训练集 X 的 shape 是 (32,32,3,73257) 也就是(width, height, channels, samples), tensorflow 的张量需要 (samples, width, height, channels),所以需要转换一下,由于直接调用 cifar 10 的网络模型,数据只需要先做个归一化,所有像素除于255就 OK,另外原始数据 0 的标签是 10,这里要转化成 0,并提供 one_hot 编码。
下面的代码建立一个py文件保存,以供后续训练代码调用
#!/usr/bin/env python2
# -*- coding: utf-8 -*-
import scipy.io as sio
import matplotlib.pyplot as plt
import numpy as np
image_size = 32
num_labels = 10
def display_data():
print('loading Matlab data...')
train = sio.loadmat('Data/train_32x32.mat')
data = train['X']
label = train['y']
for i in range(10):
plt.subplot(2, 5, i + 1)
plt.title(label[i][0])
plt.imshow(data[..., i])
plt.axis('off')
plt.show()
def load_data(one_hot=False):
train = sio.loadmat('Data/train_32x32.mat')
test = sio.loadmat('Data/test_32x32.mat')
train_data = train['X']
train_label = train['y']
test_data = test['X']
test_label = test['y']
train_data = np.swapaxes(train_data, 0, 3)
train_data = np.swapaxes(train_data, 2, 3)
train_data = np.swapaxes(train_data, 1, 2)
test_data = np.swapaxes(test_data, 0, 3)
test_data = np.swapaxes(test_data, 2, 3)
test_data = np.swapaxes(test_data, 1, 2)
test_data = test_data / 255.
train_data = train_data / 255.
for i in range(train_label.shape[0]):
if train_label[i][0] == 10:
train_label[i][0] = 0
for i in range(test_label.shape[0]):
if test_label[i][0] == 10:
test_label[i][0] = 0
if one_hot:
train_label = (np.arange(num_labels) == train_label[:, ]).astype(np.float32)
test_label = (np.arange(num_labels) == test_label[:, ]).astype(np.float32)
return train_data, train_label, test_data, test_label
if __name__ == '__main__':
load_data(one_hot=True)
display_data()
tflearn训练及模型保存
'''
tflearn训练代码
'''
from __future__ import division, print_function, absolute_import
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d
from tflearn.layers.estimator import regression
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
# Data loading and preprocessing
import transData as SVHN # 这里换成你自己的数据处理的文件名称
X, Y, X_test, Y_test = SVHN.load_data(one_hot = True)
X, Y = shuffle(X, Y)
# Real-time data preprocessing
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()
# Convolutional network building
network = input_data(shape=[None, 32, 32, 3],
data_preprocessing=img_prep)
network = conv_2d(network, 32, 3, activation='relu')
network = max_pool_2d(network, 2)
network = conv_2d(network, 64, 3, activation='relu')
network = conv_2d(network, 64, 3, activation='relu')
network = max_pool_2d(network, 2)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 10, activation='softmax')
network = regression(network, optimizer='adam',
loss='categorical_crossentropy',
learning_rate=0.001)
# Train using classifier
model = tflearn.DNN(network, tensorboard_verbose=0)
model.fit(X, Y, n_epoch=15, shuffle=True, validation_set=(X_test, Y_test),
show_metric=True, batch_size=96, run_id='svhn_cnn')
model.save("./models/model1/svhnCNN.tfl")
keras训练及模型保存
'''
由tflearn训练代码修改的keras训练代码
'''
from __future__ import division, print_function, absolute_import
from tflearn.data_utils import shuffle
from tflearn.data_preprocessing import ImagePreprocessing
import keras
from keras import Sequential
from keras.layers import Conv2D, MaxPooling2D, Dropout, Flatten, Dense
import matplotlib.pyplot as plt
# 数据加载和预处理
import transData as SVHN # 这里换成你自己的数据处理的文件名称
X, Y, X_test, Y_test = SVHN.load_data(one_hot = True)
X, Y = shuffle(X, Y)
# 实时数据预处理
epochs = 15
batch_size = 96
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()
input_shape = (32, 32, 3)
# 卷积网络搭建
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=(3, 3), activation='relu',
input_shape=input_shape))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
model.add(MaxPooling2D(pool_size=(2, 2)))
model.add(Flatten())
model.add(Dense(units=512, activation='relu'))
model.add(Dropout(rate=0.5))
model.add(Dense(units=10, activation='softmax'))
model.summary()
model.compile(loss=keras.losses.categorical_crossentropy,
optimizer=keras.optimizers.Adadelta(),
metrics=['accuracy'])
# 训练模型
model.fit(X, Y,
batch_size=batch_size,
epochs=epochs,
verbose=1,
validation_data=(X_test, Y_test))
# 保存
model.save("svhnCNN.h5")
模型加载
tflearn
'''
加载tflearn模型
'''
import tflearn
from tflearn.data_utils import shuffle, to_categorical
from tflearn.layers.core import input_data, dropout, fully_connected
from tflearn.layers.conv import conv_2d, max_pool_2d, global_avg_pool
from tflearn.layers.estimator import regression
from tflearn.layers.normalization import batch_normalization
from tflearn.data_preprocessing import ImagePreprocessing
from tflearn.data_augmentation import ImageAugmentation
import numpy as np
from load_input import load_test_data
import cv2 as cv
X_test, Y_test = load_test_data()
X_test, Y_test = shuffle(X_test, Y_test)
# Real-time data preprocessing
img_prep = ImagePreprocessing()
img_prep.add_featurewise_zero_center()
img_prep.add_featurewise_stdnorm()
# Convolutional network building
network = input_data(shape=[None, 32, 32, 3],
data_preprocessing=img_prep)
network = conv_2d(network, 32, 3, activation='relu')
network = max_pool_2d(network, 2)
network = conv_2d(network, 64, 3, activation='relu')
network = conv_2d(network, 64, 3, activation='relu')
network = max_pool_2d(network, 2)
network = fully_connected(network, 512, activation='relu')
network = dropout(network, 0.5)
network = fully_connected(network, 10, activation='softmax')
network = regression(network, optimizer='adam',
loss='categorical_crossentropy',
learning_rate=0.001)
# Train using classifier
model = tflearn.DNN(network, tensorboard_verbose=0)
model.load("./models/model1/svhnCNN.tfl")
img = cv.imread('你的测试图片路径')
img = cv.resize(img, (32, 32))
img = img.reshape(1, 32, 32, 3)
img = img.astype('float32') / 255.0 # 归一化
results = model.predict(img)
print(results)
result = results[0].argmax()
print(result)
keras
'''
加载keras模型
'''
import cv2
import numpy as np
from keras import models
import matplotlib.pyplot as plt
def transMNIST(path, size=(32, 32)):
img = cv2.imread(path)
img = cv2.imread(path)
img = cv2.resize(img, size)
# cv2.imshow('img', img)
# cv2.waitKey(0)
img = img.reshape(1, 32, 32, 3)
# print(img)
return img
def predict(imgData):
my_mnist_model = models.load_model('models/model2/svhnCNN.h5')
# print(my_mnist_model.summary())
img = imgData.astype('float32') / 255.0 # 归一化
results = my_mnist_model.predict(img)
result_number = []
result = results[0].argmax()
print(result)
# for result in results:
# result_number.append(np.argmax(result))
return result
path = '你的路径'
imgData = transMNIST(path)
results = predict(imgData)