- 图片预处理
原数据集是从A到Z以及del,nothing,space在内的29类手势的图片,每一类的图片是1500张,每张的图片大小是200*200,彩色图片。这里我们运用的是keras神经网络库,所以对原先图片进行以下的处理:
1. 将图片进行贴标签。原数据集是A到Z,del,nothing,space一共29类,对应着0到28这29个数字。如图1所示,在对应类别图片的名字前面加上“数字_”。
图 1:rename函数
2. 将每类1500张图片分成训练,验证,测试(12:2:2)三个数据集。训练集和验证集提供给keras框架,让其进行训练,验证。待到模型完全建立好之后,利用keras库里的predict_classes函数进行预测图片的类别。(切记需要用到random库中的shuffle进行随机抽取采样,这样才能避免某些特殊图片能被训练到)
图 2:分数据集函数
图 3:从A到Z循环的搭建
3. 将图片进行缩小以及颜色的转换。“缩小”是为了减小文件的体积(这里运用的是Bilinear线性剪裁像素点)加快图片的读入。“颜色转为灰色”是为了增强鲁棒性(图片的特征)。
图 4:图片的缩小以及灰度转换
- 数据的导入
首先导入几个模块。如图5,从数据集中读入图片,并将图片转化成高维数组,再将图片数组进行格式的修改。图片的“大小”与“通道”填写进np.reshape()
图 5:读取图片返回高维数组
如图6,这里需要说的是这一段程序:int(filename.split('_')[0]) 这段程序便是把第1中的“名字”提取出来,用split函数分成2个部分,用“切片”:【0】读取第一部分,也就是“标签”,再将其转化为int类型。与图片预处理中的“1”对应。
图 6:读取数据
(程序里的test其实对应的是我们放进去的验证集)
图 7:keras模型
图 8 :相关参数以及模型的保存
用时间127s, 精确度达到96.81%。
图 9 :识别率
画出的accuracy和loss的折线图如下:
图 10 :利用matplotlib画图
图 11 :精确度
图 12 :损失函数
- 预测图片
用原先分好的测试集作为预测图片。正向进行模型读取,权重读取,分类即可。对主程序进行解释。(如图11)
- max(model.predict(x_test)[0]) 从softmax激活函数分成的(29,)数组中取出可信度最高的一个。
- model.predict_classes(x_test) 判断所在的类别。
- Random.sample(list,number) 从列表中随机抽取若干样本。这样能大大减小时间消耗。
- 如下所示循环:
for image in list1 :
predict= dress + '/' + image #预测的
x,category= main(predict)
#print(x,category,image)
if int(category) == int(image.split('_')[0]) :
print(category,image.split('_')[0],'True')
else :
print(category,image.split('_')[0],'F')
如果“标签”和所预测的结果相对应,就打印“True”,否则就打印“F”。
图 13 :预测图片
预测结果如下:
图 14 :预测分类结果
代码整理如下:
import os,shutil
import random
'''
# 1. rename
# The dataset is divided into 3 categories train,validation,test
# The gesture is converted to the corresponding label.
# A ~ Z and del nothing space
# 0 ~ 25 and 26 27 28
'''
def rename(filepath,kind):
#kind is category
flag=True
images=os.listdir(filepath) #把该路径下所有图片都读取
for name in images:
if flag:
os.chdir(filepath)
flag=False
os.rename(name,kind+'_'+name)
# 2. 分三训练 验证 测试集
def to_category(original_idx_dir,train_idx_dir,validation_idx_dir,test_idx_dir) :
j =0
list1 = os.listdir(original_idx_dir)
random.shuffle(list1) #shuffle!!!!!!!!!
for fname in list1 : #1000
if j<1200:# Copy next 1000 images to train_idx_dir
src1 = os.path.join(original_idx_dir, fname)
dst1 = os.path.join(train_idx_dir, fname)
shutil.copyfile(src1, dst1)
elif (j>=1200 and j<1400):# Copy next 300 images to validation_idx_dir
src2 = os.path.join(original_idx_dir, fname)
dst2 = os.path.join(validation_idx_dir, fname)
shutil.copyfile(src2, dst2)
elif (j>=1400):# Copy next 200 images to test_idx_dir
src3 = os.path.join(original_idx_dir, fname)
dst3 = os.path.join(test_idx_dir, fname)
shutil.copyfile(src3, dst3)
j=j+1
if __name__ == '__main__':
old_dir = "D:\\1\\ASL\\training_data\\"
train_all = "D:\\1\\ASL\\1_2"
validation_all = "D:\\1\\ASL\\2_2"
test_all = "D:\\1\\ASL\\3_2"
for i in range(26):
ASL = chr(i+65)
# ASL = "nothing" # A to Z and del nothing space
to_category(old_dir+ ASL,
train_all,
validation_all,
test_all)
# 3. 剪裁图片并变成灰色
import os
from PIL import Image
def convertjpg(file,jpgfile,outdir,width,height):
#导入图片名称为jpgfile,输出到outdir文件中
img=Image.open(file+'/'+jpgfile).convert('L')
try:
new_jpg=img.resize((width,height),Image.BILINEAR)
new_jpg.save(os.path.join(outdir,os.path.basename(jpgfile)))
except Exception as e:
print(e)
# resize 32*32 and into gray
input_file = 'D:/1/ASL/3_2'
output_file= 'D:/1/ASL/3_4'
for jpgfile in os.listdir(input_file):
convertjpg(input_file,jpgfile,output_file,32,32)
print(jpgfile)
# 4. 读入数据
from keras.utils import np_utils
import os
import numpy as np
from PIL import Image
def read_image1(filename,filet):
img=Image.open(filet+'/'+filename)
return np.array(img).reshape(32,32,1) # gray, so channel : 1
def data_load(filetrain,filetest) :
#----------------------------------------------------------------------------------
#将训练集图片转换成数组
image1 = os.listdir(filetrain) #读取文件夹中所有的图片
x_train=[]
for i in image1:
x_train.append(read_image1(i,filetrain))
#根据文件名字提取标签
y_train=[]
for filename in image1:
y_train.append(int(filename.split('_')[0])) #把文件名按下划线分割并取第一个元素
y_train=np.array(y_train) #将标签转化为numpy类型的数组
#-----------------------------------------------------------------------------------------------
# 将测试图片转换成数组
image2 = os.listdir(filetest) # 读取文件夹中所有的图片
x_test = []
for i in image2:
x_test.append(read_image1(i,filetest))
# 根据文件名字提取标签
y_test = []
for filename in image2:
y_test.append(int(filename.split('_')[0])) # 把文件名按下划线分割并取第一个元素
y_test = np.array(y_test) # 将标签转化为numpy类型的数组
#------------------------------------------------------------------------------------
#对标签进行keras中特定方式的编码
y_train=np_utils.to_categorical(y_train)
y_test=np_utils.to_categorical(y_test)
#归一化 将像素0~255转换成0~1,提高特征提取精度,加快收敛
x_train=np.array(x_train) #需要将图片数据转化成numpy数组格式,不能是列表形式
x_test=np.array(x_test)
x_train=x_train.astype('float32')
x_test=x_test.astype('float32')
x_train/=255
x_test/=255
return x_train,x_test,y_train,y_test
# 5. 模型 训练 评估 画图 保存模型
from data_loading import data_load
from keras.models import Sequential
from keras.layers import Dense,Dropout,Flatten
from keras.optimizers import SGD
from keras.layers import Conv2D,MaxPool2D
import time
from keras import initializers
from matplotlib import pyplot as plt
#load data
filetrain = 'D:\\1\\ASL\\1_4'
filetest = 'D:\\1\\ASL\\2_4'
x_train,x_test,y_train,y_test = data_load(filetrain,filetest)
#----------------------------------------------------------------------------------------
# make model
model=Sequential()
model.add(Conv2D(16, (5, 5),
input_shape = (32, 32, 1),
activation = 'relu',
kernel_initializer=initializers.random_normal(stddev=0.04,mean = 0.00),
bias_initializer = initializers.Constant(value=0.2)
))
# Adding a second convolutional layer
model.add(Conv2D(32, (5, 5),
activation = 'relu',
kernel_initializer=initializers.random_normal(stddev=0.04,mean = 0.00),
bias_initializer = initializers.Constant(value=0.2)
))
model.add(MaxPool2D(pool_size = (2, 2)))
# Adding a third convolutional layer
model.add(Conv2D(48, (4, 4),
activation = 'relu',
kernel_initializer=initializers.random_normal(stddev=0.04,mean = 0.00),
bias_initializer = initializers.Constant(value=0.2)
))
model.add(MaxPool2D(pool_size = (2, 2)))
# Flattening
model.add(Flatten())
#Full connection
model.add(Dense(1024 , activation = 'relu', kernel_initializer=initializers.random_normal(stddev=0.02,mean = 0.00), bias_initializer = initializers.Constant(value=0.1)))
model.add(Dropout(0.5))
# output layer
model.add(Dense(29, activation = 'softmax', kernel_initializer=initializers.random_normal(stddev=0.02,mean = 0.00), bias_initializer = initializers.Constant(value=0.1)))
#定义优化器
sgd=SGD(lr=0.01,decay=1e-6,momentum=0.9,nesterov=True)
model.compile(loss='categorical_crossentropy',optimizer='adam',metrics=['accuracy'])
#------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# use time
start = time.time()
history = model.fit(x_train,y_train,validation_data=(x_test, y_test),batch_size=50,epochs=40) #一次处理batch_size张图片,一共训练40次
end = time.time()
print('time : ' , end-start ,' s')
#用测试集评价
score=model.evaluate(x_test,y_test,batch_size=1)
print(score)
#--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
#保存模型
model_json = model.to_json()
with open('D:/1/ASL/model1.json', 'w') as file:
file.write(model_json)
#保存权重,预测时只需要加载权重文件就能预测
model.save_weights('ASLwights1.h5',overwrite=True)
# picture ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
# accuracy
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# loss
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'validation'], loc='upper left')
plt.show()
# 6. 预测
import random
import os
from PIL import Image
import numpy as np
from keras.models import model_from_json
def prepicture(picname) :
new_img = Image.open(picname).resize((32,32),Image.BILINEAR).convert('L')
return np.array(new_img).reshape(32,32,1)
def main(predict) :
x_test=[]
x_test.append(prepicture(predict))
x_test=np.array(x_test)
x_test=x_test.astype('float32')
x_test/=255
#在预测的时候必须给出模型
model=model_from_json(open("D:\\1\\ASL\\model.json").read())
#读入保存的模型
model.load_weights( 'D:/1/ASL/ASLwights.h5' )
trust = max(model.predict(x_test)[0])
classes = model.predict_classes(x_test)
return trust,classes[0]
if __name__ == '__main__':
dress = "D:/1/ASL/3_2" #test
list1 = os.listdir(dress)
list1 = random.sample(list1,10)
for image in list1 :
predict= dress + '/' + image #预测的
x,category= main(predict)
#print(x,category,image)
if int(category) == int(image.split('_')[0]) :
print(category,image.split('_')[0],'True')
else :
print(category,image.split('_')[0],'F')