Tensorflow Keras:手写大写字母识别

如何训练一个模型分类手写字母?

环境:
Tensorflow:2.7.0
Keras:2.7.0
python: 3.8.0

数据来源:https://github.com/azadis/MC-GAN

1. 制作数据集---图片切割

import cv2, time, glob
import os
class_object = "alphabet"
dir_list = ["train", "test"]
for dir_name in dir_list:
    dir = class_object + os.sep + "MC-GAN/datasets/Capitals64" + os.sep + dir_name + os.sep + "*.png"
    image_list = glob.glob(dir)
    count = 0
    for image_path in image_list:
        image = cv2.imread(image_path, 0)
        if(image is not None):
            for i in range(0,26):
                img = image[:, i*64 : i*64 + 64]
                img = cv2.resize(img, (28,28))
                _, img = cv2.threshold(img, 120, 255, cv2.THRESH_BINARY_INV)
                alpa_dir = class_object + os.sep + dir_name + os.sep + chr(65+i)
                if(not os.path.exists(alpa_dir)):
                    os.makedirs(alpa_dir)
                path_list = image_list[0].split(os.sep)
                image_name = path_list[len(path_list) - 1]
                cv2.imwrite(alpa_dir + os.sep + chr(65+i) + "_" + str(count) + "_" + image_name, img)
            count = count + 1


2.  训练集测试集转二进制文件


from PIL import Image
import matplotlib.pyplot as plt
import numpy as np
import cv2, time, glob
import os

dir_list = ["train", "test"]

#class_object = "number"
#class_num = 10
class_object = "alphabet"
class_num = 26

for dir in dir_list:
    sample = []
    for num in range(0, class_num):
        if(class_object == "alphabet"):
            tmp_dir = chr(num + 65)
        if (class_object == "number"):
            tmp_dir = str(num)
        dir_aplha = class_object + os.sep + dir + os.sep + tmp_dir
        image_list = glob.glob(dir_aplha + os.sep + "*.png")
        row = []
        for image_path in image_list:
            temp = Image.open(image_path)
            image = temp.copy()
            bw_image = image.convert(mode='L')
            row.append(bw_image)
            temp.close()
        sample.append(row)

    binary_samples = np.array([[image.getdata() for image in row] for row in sample])
    binary_samples = binary_samples.reshape(len(sample)*len(sample[0]), 28, 28)

    classes = np.array([[i]*len(sample[0]) for i in range(class_num)]).reshape(-1)

    print(f'X shape: {binary_samples.shape}')
    print(f'y shape: {classes.shape}')

    xfile = class_object + os.sep + 'binary_data' + os.sep + class_object + '_x_' + dir + '.npy'
    yfile = class_object + os.sep + 'binary_data' + os.sep + class_object + '_y_' + dir + '.npy'
    np.save(xfile, binary_samples)
    np.save(yfile, classes)


3. Train


from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
from keras import backend as K
import numpy as np
import tensorflow as tf
from tensorflow.keras.preprocessing import image_dataset_from_directory
import os
import tensorflow_datasets as tfds

#object_class = 'number'
#class_num = 10

object_class = 'alphabet'
class_num = 26


#######################################################################################
mirrored_strategy = tf.distribute.MirroredStrategy()
BATCH_SIZE_PER_REPLICA = 64
BATCH_SIZE = BATCH_SIZE_PER_REPLICA * mirrored_strategy.num_replicas_in_sync
BUFFER_SIZE = 10000

def get_mnist_data(): 
    datasets, ds_info = tfds.load(name='mnist', with_info=True, as_supervised=True)
    mnist_train, mnist_test = datasets['train'], datasets['test']
    def scale(image, label):
        image = tf.cast(image, tf.float32)
        image /= 255
        return image, label
    train_dataset = mnist_train.map(scale).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
    eval_dataset = mnist_test.map(scale).batch(BATCH_SIZE)
    return train_dataset, eval_dataset
#######################################################################################
# dimensions of our images.
img_width, img_height = 28, 28

EPOCHES = 25

# test set
x_test_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_x_test.npy'
y_test_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_y_test.npy'

# train set
x_train_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_x_train.npy'
y_train_file = object_class + os.sep + 'binary_data' + os.sep + object_class + '_y_train.npy'

# load test data
x_test = np.load(x_test_file)
y_test = np.load(y_test_file)
# load train data
x_train = np.load(x_train_file)
y_train = np.load(y_train_file)

# normalize
# Scale these values to a range of 0 to 1 before feeding them to the neural network model. 
# To do so, divide the values by 255. It's important that the training set and the testing set be preprocessed in the same way:
x_train = x_train / 255.0
x_test = x_test / 255.0

# Add a channels dimension
x_train = x_train[..., tf.newaxis].astype("float32")
x_test = x_test[..., tf.newaxis].astype("float32")

a = (x_train, y_train)

train_ds = tf.data.Dataset.from_tensor_slices((x_train, y_train)).shuffle(10000).batch(64)
test_ds = tf.data.Dataset.from_tensor_slices((x_test, y_test)).shuffle(10000).batch(64)


# mnist_train, mnist_test = get_mnist_data()
# mnist_train_list = list(mnist_train.as_numpy_iterator())
# mnist_test_list = list(mnist_test.as_numpy_iterator())


# train_ds_list = list(train_ds.as_numpy_iterator())
# test_ds_list = list(test_ds.as_numpy_iterator())

# total_train_list = train_ds_list + mnist_train_list
# total_test_list = test_ds_list + mnist_test_list

# train_ds = tf.data.Dataset.from_tensor_slices(mnist_train_list).shuffle(10000).batch(64)
# test_ds = tf.data.Dataset.from_tensor_slices(mnist_test_list).shuffle(10000).batch(64)

###########################################################################
# Building the neural network requires configuring the layers of the model, then compiling the model.
# 
# layers.Flatten: transforms the format of the images from a two-dimensional array (of 28 by 28 pixels) 
# to a one-dimensional array (of 28 * 28 = 784 pixels).This layer has no parameters to learn; it only reformats the data.
# layers.Dense: fully connected, neural layers, The first Dense layer has 64 nodes (or neurons). 
# The second (and last) layer returns a logits array with length of 26, Each node contains a score 
# that indicates the current image belongs to one of the 10 classes.
model = tf.keras.Sequential([
        tf.keras.layers.Conv2D(32, 3, activation='relu', input_shape=(28,28,1)),
        tf.keras.layers.MaxPooling2D(),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(64, activation='relu'),
        tf.keras.layers.Dense(class_num)
    ])
model.compile(loss=tf.keras.losses.SparseCategoricalCrossentropy(from_logits=True),
                optimizer=tf.keras.optimizers.Adam(),
                metrics=[tf.metrics.SparseCategoricalAccuracy()])
#################################################################################
model.fit(test_ds, epochs=EPOCHES)

keras_model_path = object_class + os.sep + "model_20211227"
if(not os.path.exists(keras_model_path)):
    os.makedirs(keras_model_path)
model.save(keras_model_path)


# Overfitting happens when a machine learning model performs worse on new, 
# previously unseen inputs than it does on the training data. 
# An overfitted model "memorizes" the noise and details in the training 
# dataset to a point where it negatively impacts the performance of the model on the new data. 


4. 预测


# This Python file uses the following encoding: utf-8
from matplotlib import pyplot as plt
import numpy as np
import cv2, time, glob
import tensorflow as tf
import os

# model dir
# class_object = "number"
# ch = '3'

class_object = "alphabet"
ch = 'I'

keras_model_path = class_object + os.sep + "model_20121217"
# load a model
restored_keras_model = tf.keras.models.load_model(keras_model_path)
# image file

dir = class_object + os.sep + "train" + os.sep + ch + os.sep + "*.png"
image_list = glob.glob(dir)


def predict_one(image_path):
    global count
    # print(image_path)
    # read image
    image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)#
    image = cv2.resize(image, (28, 28))
    image = image.astype('float32')
    image = image.reshape((28, 28))
    #image = 255-image
    # normalize
    image /= 255
    # predict the number in image
    start_time = time.time()
    pred = restored_keras_model.predict(image.reshape(1, 28, 28, 1), batch_size=1)
    end_time = time.time()
    # print(pred[0])
    # predict result
    if(class_object == "alphabet"):
        #print("predict: ",chr(pred.argmax()+65))
        if(chr(pred.argmax()+65) == ch):
            count = count+1
    elif (class_object == "number"):
        #print("predict: ",pred.argmax())
        if(str(pred.argmax()) == ch):
            count = count+1
    #print("time: ",end_time - start_time)
    return pred.argmax(), end_time - start_time

def predict_all():
    for image_path in image_list:
        # print(image_path)
        # read image
        image = cv2.imread(image_path, cv2.IMREAD_GRAYSCALE)#
        image = cv2.resize(image, (28, 28))
        image = image.astype('float32')
        image = image.reshape((28, 28))
        #image = 255-image
        # normalize
        image /= 255
        # predict the number in image
        start_time = time.time()
        pred = restored_keras_model.predict(image.reshape(1, 28, 28, 1), batch_size=1)
        end_time = time.time()
        # predict result
        print("predict: ",pred.argmax())
        print("time: ",end_time - start_time)
        # plt.imshow(image, cmap='gray')
        # plt.show()


for num in range(0, 26):
    ch = chr(num + 65)
    dir = class_object + os.sep + "train" + os.sep + ch + os.sep + "*.png"
    image_list = glob.glob(dir)
    count = 0
    for i in range(len(image_list)):
        predict_one(image_list[i])
    print(ch)
    print("precision: ",count/len(image_list))
  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

0010000100

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值