如何训练一个模型分类手写字母?
环境:
Tensorflow:2.7.0
Keras:2.7.0
python: 3.8.0
数据来源:https://github.com/azadis/MC-GAN
1. 制作数据集---图片切割
import cv2, time, glob
import os
class_object = "alphabet"
dir_list = ["train", "test"]
for dir_name in dir_list:
dir = class_object + os.sep + "MC-GAN/datasets/Capitals64" + os.sep + dir_name + os.sep + "*.png"
image_list = glob.glob(dir)
count = 0
for image_path in image_list:
image = cv2.imread(image_path, 0)
if(image is not None):
for i in range(0,26):
img = image[:, i*64 : i*64 + 64]
img = cv2.resize(img, (28,28))
_, img = cv2.threshold(img, 120, 255, cv2.THRESH_BINARY_INV)
alpa_dir = class_object + os.sep + dir_name + os.sep + chr(65+i)
if(not os.path.exists(alpa_dir)):
os.makedirs(alpa_dir)
path_list = image_list[0].split(os.sep)
image_name = path_list[len(path_list) - 1]
cv2.imwrite(alpa_dir + os.sep + chr(65+i) + "_" + str(count) + "_" + image_name, img)
count = count + 1
2. 训练集测试集转二进制文件
from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2, time, glob
import os
dir_list = ["train", "test"]
#class_object = "number"
#class_num = 10
class_object = "alphabet"
class_num = 26
for dir in dir_list:
sample = []
for num in range(0, class_num):
if(class_object == "alphabet"):
tmp_dir = chr(num + 65)
if (class_object == "number"):
tmp_dir = str(num)
dir_aplha = class_object + os.sep + dir + os.sep + tmp_dir
image_list = glob.glob(dir_aplha + os.sep + "*.png")
row = []
for image_path in image_list:
temp = Image.open(image_path)
image = temp.copy()
bw_image = image.convert(mode='L')
row.append(bw_image)
temp.close()
sample.append(row)
binary_samples = np.array([[image.getdata() for image in row] for row in sample])
binary_samples = binary_samples.reshape(len(sample)*len(sample[0]), 28, 28)
classes = np.array([[i]*len(sample[0]) for i in range(class_num)]).reshape(-1)
print(f'X shape: {binary_samples.shape}')
print(f'y shape: {classes.shape}')
xfile = class_object + os.sep + 'binary_data' + os.sep + class_object + '_x_' + dir + '.npy'
yfile = class_object + os.sep + 'binary_data' + os.sep + class_object + '_y_' + dir + '.npy'
np.save(xfile, binary_samples)
np.save(yfile, classes)
3. Train
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
import os
import tensorflow_datasets as tfds
#object_class = 'number'
#class_num = 10
object_class = 'alphabet'
class_num = 26
#######################################################################################
mirrored_strategy = tf.distribute.MirroredStrategy()
BATCH_SIZE_PER_REPLICA = 64
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync
BUFFER_SIZE = 10000
def get_mnist_data():
datasets, ds_info = tfds.load(name='mnist', with_info=True, as_supervised=True)
mnist_train, mnist_test = datasets['train'], datasets['test']
def scale(image, label):
image = tf.cast(image, tf.float32)
image /= 255
return image, label
train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE)
return train_dataset, eval_dataset
#######################################################################################
# dimensions of our images.
img_width, img_height = 28, 28
EPOCHES = 25
# test set
x_test_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_x_test.npy'
y_test_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_y_test.npy'
# train set
x_train_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_x_train.npy'
y_train_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_y_train.npy'
# load test data
x_test = np.load(x_test_file)
y_test = np.load(y_test_file)
# load train data
x_train = np.load(x_train_file)
y_train = np.load(y_train_file)
# normalize
# Scale these values to a range of 0 to 1 before feeding them to the neural network model.
# To do so, divide the values by 255. It's important that the training set and the testing set be preprocessed in the same way:
x_train = x_train / 255.0
x_test = x_test / 255.0
# Add a channels dimension
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")
a = (x_train, y_train)
train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(64)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(10000).batch(64)
# mnist_train, mnist_test = get_mnist_data()
# mnist_train_list = list(mnist_train.as_numpy_iterator())
# mnist_test_list = list(mnist_test.as_numpy_iterator())
# train_ds_list = list(train_ds.as_numpy_iterator())
# test_ds_list = list(test_ds.as_numpy_iterator())
# total_train_list = train_ds_list + mnist_train_list
# total_test_list = test_ds_list + mnist_test_list
# train_ds = tf.data.Dataset.from_tensor_slices(mnist_train_list).shuffle(10000).batch(64)
# test_ds = tf.data.Dataset.from_tensor_slices(mnist_test_list).shuffle(10000).batch(64)
###########################################################################
# Building the neural network requires configuring the layers of the model, then compiling the model.
#
# layers.Flatten: transforms the format of the images from a two-dimensional array (of 28 by 28 pixels)
# to a one-dimensional array (of 28 * 28 = 784 pixels).This layer has no parameters to learn; it only reformats the data.
# layers.Dense: fully connected, neural layers, The first Dense layer has 64 nodes (or neurons).
# The second (and last) layer returns a logits array with length of 26, Each node contains a score
# that indicates the current image belongs to one of the 10 classes.
model = tf.keras.Sequential([
tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28,28,1)),
tf.keras.layers.MaxPooling2D(),
tf.keras.layers.Flatten(),
tf.keras.layers.Dense(64, activation='relu'),
tf.keras.layers.Dense(class_num)
])
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
optimizer=tf.keras.optimizers.Adam(),
metrics=[tf.metrics.SparseCategoricalAccuracy()])
#################################################################################
model.fit(test_ds, epochs=EPOCHES)
keras_model_path = object_class + os.sep + "model_20211227"
if(not os.path.exists(keras_model_path)):
os.makedirs(keras_model_path)
model.save(keras_model_path)
# Overfitting happens when a machine learning model performs worse on new,
# previously unseen inputs than it does on the training data.
# An overfitted model "memorizes" the noise and details in the training
# dataset to a point where it negatively impacts the performance of the model on the new data.
4. 预测
# This Python file uses the following encoding: utf-8
from matplotlib import pyplot as plt
import numpy as np
import cv2, time, glob
import tensorflow as tf
import os
# model dir
# class_object = "number"
# ch = '3'
class_object = "alphabet"
ch = 'I'
keras_model_path = class_object + os.sep + "model_20121217"
# load a model
restored_keras_model = tf.keras.models.load_model(keras_model_path)
# image file
dir = class_object + os.sep + "train" + os.sep + ch + os.sep + "*.png"
image_list = glob.glob(dir)
def predict_one(image_path):
global count
# print(image_path)
# read image
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)#
image = cv2.resize(image, (28, 28))
image = image.astype('float32')
image = image.reshape((28, 28))
#image = 255-image
# normalize
image /= 255
# predict the number in image
start_time = time.time()
pred = restored_keras_model.predict(image.reshape(1, 28, 28, 1), batch_size=1)
end_time = time.time()
# print(pred[0])
# predict result
if(class_object == "alphabet"):
#print("predict: ",chr(pred.argmax()+65))
if(chr(pred.argmax()+65) == ch):
count = count+1
elif (class_object == "number"):
#print("predict: ",pred.argmax())
if(str(pred.argmax()) == ch):
count = count+1
#print("time: ",end_time - start_time)
return pred.argmax(), end_time - start_time
def predict_all():
for image_path in image_list:
# print(image_path)
# read image
image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)#
image = cv2.resize(image, (28, 28))
image = image.astype('float32')
image = image.reshape((28, 28))
#image = 255-image
# normalize
image /= 255
# predict the number in image
start_time = time.time()
pred = restored_keras_model.predict(image.reshape(1, 28, 28, 1), batch_size=1)
end_time = time.time()
# predict result
print("predict: ",pred.argmax())
print("time: ",end_time - start_time)
# plt.imshow(image, cmap='gray')
# plt.show()
for num in range(0, 26):
ch = chr(num + 65)
dir = class_object + os.sep + "train" + os.sep + ch + os.sep + "*.png"
image_list = glob.glob(dir)
count = 0
for i in range(len(image_list)):
predict_one(image_list[i])
print(ch)
print("precision: ",count/len(image_list))