本文主要实现使用keras对字符进行分类。
项目目录结构:
一、生成数据。
注:生成数据的代码只能在ubuntu操作系统中运行。
generate_image.py:
import os
import random
import numpy as np
import uuid
PATH_TO_LIGHT_BACKGROUNDS = 'light_backgrounds/'
PATH_TO_DARK_BACKGROUNDS = 'dark_backgrounds/'
PATH_TO_FONT_FILES = 'fonts/'
OUTPUT_DIR = 'ouput/'
NUM_IMAGES_PER_CLASS = 10
# Get all files from directory
def get_files_from_dir(dirname):
list_files = (os.listdir(dirname))
list_files = [dirname + x for x in list_files]
return list_files
# Random perspective distortion created by randomly moving the for corners of the image.
def get_distort_arg():
amount = 5
hundred_minus_amount = 100 - amount
return '\'0,0 ' + str(np.random.randint(0,amount)) + ',' + str(np.random.randint(0,amount)) + ' 100,0 ' + str(np.random.randint(hundred_minus_amount,100)) + ',' + str(np.random.randint(0,amount)) + ' 0,100 ' + str(np.random.randint(0,amount)) + ',' + str(np.random.randint(hundred_minus_amount,100)) + ' 100,100 ' + str(np.random.randint(hundred_minus_amount,100)) + ',' + str(np.random.randint(hundred_minus_amount,100)) + '\''
# Randomly extracts 32x32 regions of an image and saves it to outdir
def create_random_crops(image_filename, num_crops, out_dir):
dim = os.popen('convert ' + image_filename + ' -ping -format "%w %h" info:').read()
dim = dim.split()
im_width = int(dim[0])
im_height = int(dim[1])
for i in range(0, num_crops):
# Randomly select first co-ordinate of square for cropping image
x = random.randint(0,im_width - 32)
y = random.randint(0,im_height - 32)
outfile = uuid.uuid4().hex + '.jpg'
command = "convert "+ image_filename + " -crop 32x32"+"+"+str(x)+"+"+str(y)+" " + os.path.join(out_dir, outfile)
os.system(str(command))
# Generate crops for all files in file_list and store them in dirname
def generate_crops(file_list, dirname):
if not os.path.isdir(dirname):
os.mkdir(dirname)
for f in file_list:
create_random_crops(f, 10, dirname)
# List of characters
char_list = []
for i in range(65, 65+26):
char_list.append(chr(i))
# List of digits
for j in range(48,48+10):
char_list.append(chr(j))
# List of light font colors
color_light = ['white','lime','gray','yellow','silver','aqua']
# List of light dark colors
color_dark = ['black','green','maroon','blue','purple','red']
# List of light backgrounds
light_backgrounds = get_files_from_dir(PATH_TO_LIGHT_BACKGROUNDS)
# List of dark backgrounds
dark_backgrounds = get_files_from_dir(PATH_TO_DARK_BACKGROUNDS)
# List of font files
list_files_fontt = get_files_from_dir(PATH_TO_FONT_FILES)
light_backgrounds_crops_dir = 'light_backgrounds_crops/'
dark_backgrounds_crops_dir = 'dark_backgrounds_crops/'
generate_crops(light_backgrounds, light_backgrounds_crops_dir)
generate_crops(dark_backgrounds, dark_backgrounds_crops_dir)
# List of all files in the crops directory
light_backgrounds = get_files_from_dir(light_backgrounds_crops_dir)
dark_backgrounds = get_files_from_dir(dark_backgrounds_crops_dir)
# List of all backgrounds
all_backgrounds = [dark_backgrounds, light_backgrounds]
# Sample Command----- magick convert image.jpg -fill Black -font Courier-Oblique -weight 50 -pointsize 12 -gravity center -blur 0x8 -evaluate Gaussian-noise 1.2 -annotate 0+0 "Some text" output_image
for i in range(0,len(char_list)):
char = char_list[i]
char_output_dir = OUTPUT_DIR + str(char) + "/"
if not os.path.exists(char_output_dir):
os.makedirs(char_output_dir)
print("Generating data " + char_output_dir)
# Generate synthetic images
for j in range(0,NUM_IMAGES_PER_CLASS):
# Choose a light or dark background
path = random.choice(all_backgrounds)
# Choose a file
list_filernd = random.choice(path)
# Choose a font
list_rfo = random.choice(list_files_fontt)
# Get random distortion
distort_arg = get_distort_arg()
# Get random blur amount
blur = random.randint(0,3)
# Get random noise amount
noise = random.randint(0,5)
# Add random shifts from the center
x = str(random.randint(-3,3))
y = str(random.randint(-3,3))
# Choose light color for dark backgrounds and vice-versa
if path == all_backgrounds[0] :
color = random.choice(color_light)
else:
color = random.choice(color_dark)
command = "convert " + str(list_filernd) + " -fill "+str(color)+" -font "+ \
str(list_rfo) + " -weight 200 -pointsize 24 -distort Perspective "+str(distort_arg)+" "+"-gravity center" + " -blur 0x" + str(blur) \
+ " -evaluate Gaussian-noise " + str(noise) + " " + " -annotate +" + x + "+" + y + " " + str(char_list[i]) + " " + char_output_dir + "output_file"+str(i)+str(j)+".jpg"
# Uncomment line below to see what command is executed.
# print(command)
os.system(str(command))
生成训练数据时,将OUTPUT_DIR = 'ouput/'改为OUTPUT_DIR = 'train/',将NUM_IMAGES_PER_CLASS = 10改为NUM_IMAGES_PER_CLASS = 800。
生成测试数据时,将OUTPUT_DIR = 'ouput/'改为OUTPUT_DIR = 'test/',将NUM_IMAGES_PER_CLASS = 10改为NUM_IMAGES_PER_CLASS = 200。
然后就会生成A-Z,0-9的数据(0如下所示)。
二、运行模型对数据进行训练。
net.py:
# import required modules
from keras.models import Sequential
from keras.layers import Convolution2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense
import matplotlib.pyplot as plt
class Net:
@staticmethod
def build(width, height, depth, weightsPath=None):
'''
modified lenet structure
input: input_shape (width, height, channels)
returns: trained/loaded model
'''
# initialize the model
model = Sequential()
# first layer CONV => RELU => POOL
model.add(Convolution2D(32, (3, 3), input_shape = (width, height, depth)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
# second layer CONV => RELU => POOL
model.add(Convolution2D(32, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
# third layer of CONV => RELU => POOL
model.add(Convolution2D(64, (3, 3)))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size = (2, 2)))
# set of FC => RELU layers
model.add(Flatten())
# number of neurons in FC layer = 128
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dropout(0.5))
# as number of classes is 36
model.add(Dense(36))
model.add(Activation('softmax'))
# if weightsPath is specified load the weights
if weightsPath is not None:
print('weights loaded')
model.load_weights(weightsPath)
# return model
return model
train_model.py:
# import required modules
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
import matplotlib.pyplot as plt
# import created model
from net import Net
# Dimensions of our images
img_width, img_height = 32, 32
# 3 channel image
no_of_channels = 3
# train data Directory
train_data_dir = 'train/'
# test data Directory
validation_data_dir = 'test/'
epochs = 80
batch_size = 32
#initialize model
model = Net.build(width = img_width, height = img_height, depth = no_of_channels)
print('building done')
# Compile model
rms = optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=None, decay=0.0)
print('optimizing done')
model.compile(loss='categorical_crossentropy',
optimizer=rms,
metrics=['accuracy'])
print('compiling')
# this is the augmentation configuration used for training
# horizontal_flip = False, as we need to retain Characters
train_datagen = ImageDataGenerator(
featurewise_center=True,
featurewise_std_normalization=True,
rescale=1. / 255,
shear_range=0.1,
zoom_range=0.1,
rotation_range=5,
width_shift_range=0.05,
height_shift_range=0.05,
horizontal_flip=False)
# this is the augmentation configuration used for testing, only rescaling
test_datagen = ImageDataGenerator(featurewise_center=True, featurewise_std_normalization=True, rescale=1. / 255)
train_generator = train_datagen.flow_from_directory(
train_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
validation_generator = test_datagen.flow_from_directory(
validation_data_dir,
target_size=(img_width, img_height),
batch_size=batch_size,
class_mode='categorical')
# fit the model
history = model.fit_generator(
train_generator,
steps_per_epoch=train_generator.samples / batch_size,
epochs=epochs,
validation_data=validation_generator,
validation_steps=validation_generator.samples / batch_size)
# evaluate on validation dataset
model.evaluate_generator(validation_generator)
# save weights in a file
model.save_weights('trained_weights.h5')
print(history.history)
# Loss Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['loss'],'r',linewidth=3.0)
plt.plot(history.history['val_loss'],'b',linewidth=3.0)
plt.legend(['Training loss', 'Validation Loss'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Loss',fontsize=16)
plt.title('Loss Curves',fontsize=16)
# Accuracy Curves
plt.figure(figsize=[8,6])
plt.plot(history.history['acc'],'r',linewidth=3.0)
plt.plot(history.history['val_acc'],'b',linewidth=3.0)
plt.legend(['Training Accuracy', 'Validation Accuracy'],fontsize=18)
plt.xlabel('Epochs ',fontsize=16)
plt.ylabel('Accuracy',fontsize=16)
plt.title('Accuracy Curves',fontsize=16)
plt.show()
训练结果:
三、对模型训练结果进行测试。
predict.py:
import cv2 # for reading and writing or showing image
import numpy as np
import matplotlib.pyplot as plt
from keras.models import load_model
import keras
from keras.preprocessing import image
from keras.models import load_model
from net import Net
import sys
def load_image(img_path, show=False):
'''
Function: Convert image to tensor
Input: image_path (eg. /home/user/filename.jpg)
(Note prefer having absolute path)
show (default = False), set if you want to visualize the image
Return: tensor format of image
'''
# load image using image module
# convert to (32, 32) - if not already
img = image.load_img(img_path, target_size=(32, 32)) # Path of test image
# show the image if show=True
if show:
plt.imshow(img)
plt.axis('off')
# converting image to a tensor
img_tensor = image.img_to_array(img) # (height, width, channels)
img_tensor = np.expand_dims(img_tensor, axis=0)
img_tensor /= 255.
# return converted image
return img_tensor
def predict(weights_path, image_path):
'''
Function: loads a trained model and predicts the class of given image
Input: weights_path (.h5 file, prefer adding absolute path)
image_path (image to predict, prefer adding absolute path)
Returns: none
'''
model = Net.build(32, 32, 3, weights_path)
image = load_image(image_path, show=True) # load image, rescale to 0 to 1
class_ = model.predict(image) # predict the output, returns 36 length array
print("Detected: ", class_[0]) # print what is predicted
output_indice = -1 # set it initially to -1
# get class index having maximum predicted score
for i in range(36):
if(i == 0):
max = class_[0][i]
output_indice = 0
else:
if(class_[0][i] > max):
max = class_[0][i]
output_indice = i
# append 26 characters (A to Z) to list characters
characters = []
for i in range(65, 65+26):
characters.append(chr(i))
# if output indice > 9 (means characters)
if(output_indice > 9):
final_result = characters[(output_indice - 9) - 1]
print("Predicted: ", final_result)
print("value: ", max) # print predicted score
# else it's a digit, print directly
else:
print("Predicted: ", output_indice)
print("value: ", max) # print it's predicted score
if(len(sys.argv) < 2):
print("Enter test image path as an argument")
sys.exit(0)
test_image = sys.argv[1]
predict("trained_weights.h5", test_image) # Specify weights file and Test image
预测结果:
最后,如果需要本项目完整文件的可以关注一下我的Github:https://github.com/cchangcs/CharClassification。