使用kears进行手势识别-踩坑

使用kears进行CNN手势识别

这种手势图片
在这里插入图片描述

第一次使用简单CNN识别,准确率只有50%,无论修改参数,还是用resnet残差网络学习准确率还是上不去

在这里插入图片描述
可以看到到5次的时候,准确率和Loss已经不变化了,找了好几天也不知道是什么问题。
下面是验证代码

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 12 14:44:47 2021

@author: yuyanchuan
"""

import os
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize

batch_size = 160 # 每个批次样本(数据记录)数 
num_classes = 12 # 10分类
epochs = 20 # 100个周期
data_augmentation = True
num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models') # 训练的模型保存路径
model_name = 'keras_cifar10_trained_model.h5' # 训练的模型名称


path=r'F:\code\Pythoncode\dataset\hands\fingers\train'
testPath=r'F:\code\Pythoncode\dataset\hands\fingers\test'

def getLabel(label):
    result=0
    if label=='0L':
        result=0
    elif label=='1L':
        result=1
    elif label=='2L':
        result=2
    elif label=='3L':
        result=3
    elif label=='4L':
        result=4
    elif label=='5L':
        result=5
    elif label=='0R':
        result=6
    elif label=='1R':
        result=7
    elif label=='2R':
        result=8
    elif label=='3R':
        result=9
    elif label=='4R':
        result=10
    elif label=='5R':
        result=11
    return result

def getData(filepath):
    x_train=[]
    y_train=[]
    for filename in os.listdir(filepath):
        print(filename)
        img=cv2.imread(os.path.join(filepath,filename))
        #img=cv2.resize(img,(0, 0), fx=0.75, fy=0.75, interpolation=cv2.INTER_NEAREST)
        img=cv2.resize(img,(32,32))
        #img=img_to_array(img)
        x_train.append(img)
        y_label=filename[-6:-4]
        y_label=getLabel(y_label)
        y_train.append(y_label)
        #print(y_label)
    return x_train,y_train

#读取trainx数据
trainx,trainy=getData(path)

np.save("trainx.npy",trainx)
np.save("trainy.npy",trainy)

#读取test数据
testx,testy=getData(testPath)
np.save("testx.npy",testx)
np.save("testy.npy",testy)


trainx=np.array(trainx)
print("trainx shape:",trainx.shape)
trainy=np.array(trainy)
trainy.resize((len(trainy),1))
print('trainy shape:',trainy.shape)

testx=np.array(testx)
testy=np.array(testy,dtype="float")
testy.resize((len(testy),1))


  
trainy = keras.utils.to_categorical(trainy, num_classes)
testy = keras.utils.to_categorical(testy, num_classes)
 

trainx = trainx.astype('float32')
testx = testx.astype('float32')
trainx /= 255
testx /= 255
 
chanDim=-1

trainx=normalize(trainx,axis=-1,norm='max')
testx=normalize(testx,axis=-1,norm='max')

#定义cnn模型
def cnn_model(num_classes):
    model = Sequential()
    # 建立基于keras的cnn模型
    model.add(Conv2D(32, (3, 3), padding="same",
			input_shape=(32,32,3)))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))
        
        # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

		# (CONV => RELU) * 2 => POO
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
         # first (and only) set of FC => RELU layer
    model.add(Flatten())
    model.add(Dense(1000))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
 
		# use a *softmax* activation for single-label classification
		# and *sigmoid* activation for multi-label classification
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
 
    # initiate RMSprop optimizer
    # 均方根反向传播(RMSprop,root mean square prop)优化
    opt = keras.optimizers.SGD(lr=0.02, momentum=0, decay=0.0001, nesterov=False)
 
    # Let's train the model using RMSprop
    # 使用均方根反向传播(RMSprop)训练模型
    model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
    model.summary()
    return model

model=cnn_model(12)
datagen = ImageDataGenerator(
        featurewise_center=False,  # set input mean to 0 over the dataset  # 基于数据集,使输入数据平均值为0
        samplewise_center=False,  # set each sample mean to 0 # 使样本平均值为0
        featurewise_std_normalization=False,  # divide inputs by std of the dataset # 通过数据标准化划分输入数据
        samplewise_std_normalization=False,  # divide each input by its std # 通过标准化划分输入数据
        zca_whitening=False,  # apply ZCA(Zero-phase Component Analysis) whitening # 对输入数据施加ZCA白化
        rotation_range=0,  # randomly rotate images in the range (degrees, 0 to 180) # 旋转图像0-180度
        width_shift_range=0.1,  # randomly shift images horizontally (fraction of total width) # 水平平移图像(基于图像宽度比例)
        height_shift_range=0.1,  # randomly shift images vertically (fraction of total height) # 垂直平移图像(基于图像高度比例)
        horizontal_flip=True,  # randomly flip images # 水平翻转图像
        vertical_flip=False)  # randomly flip images # 垂直翻转图像
 
    # Compute quantities required for feature-wise normalization
    # 特征归一化的计算量
    # (std, mean, and principal components if ZCA whitening is applied).
    # (如果ZCA白化(一种降维方法)会使用标准化、均值和主成分方法)
datagen.fit(trainx)
 
    # Fit the model on the batches generated by datagen.flow().
    # 使用datagen.flow()生成的批次数据在模型训练
H=model.fit_generator(datagen.flow(trainx, trainy,
                                     batch_size=batch_size),
                        epochs=epochs,
                        validation_data=(testx, testy),
                        workers=8)

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)
 
# Score trained model.
# 评估训练的模型
scores = model.evaluate(testx, testy, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])


# plot the training loss and accuracy
plt.style.use("ggplot")
plt.figure()
N = epochs
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()

思考了两天后,我想这个数据集和MINIST数据集很像,是不是我数据预处理有问题?

数据预处理整理

#读取训练数据
trainx=np.load("trainx.npy")
trainy=np.load("trainy.npy")

testx=np.load("testx.npy")
testy=np.load("testy.npy")



trainx=np.array(trainx)
trainy=np.array(trainy)
testx=np.array(testx)
testy=np.array(testy)

print(trainx.shape)
print(testx.shape)

  
trainy = keras.utils.to_categorical(trainy, num_classes)
testy = keras.utils.to_categorical(testy, num_classes)
 
print(trainy[:5])

没有使用之前的resize,直接就是用了np.array
这次发现准确率可以了,直接就提高到99%
在这里插入图片描述
下面是全部代码

# -*- coding: utf-8 -*-
"""
Created on Thu Jan 14 08:55:41 2021

@author: yuyanchuan
"""

# -*- coding: utf-8 -*-
"""
Created on Tue Jan 12 14:44:47 2021

@author: yuyanchuan
"""

import os
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize

batch_size = 320 # 每个批次样本(数据记录)数 
num_classes = 12 # 10分类
epochs = 10 # 100个周期
data_augmentation = True
num_predictions = 20
save_dir = os.path.join(os.getcwd(), 'saved_models') # 训练的模型保存路径
model_name = 'otherCnnTest.h5' # 训练的模型名称

#读取训练数据
trainx=np.load("trainx.npy")
trainy=np.load("trainy.npy")

testx=np.load("testx.npy")
testy=np.load("testy.npy")



trainx=np.array(trainx)
trainy=np.array(trainy)
testx=np.array(testx)
testy=np.array(testy)

print(trainx.shape)
print(testx.shape)

  
trainy = keras.utils.to_categorical(trainy, num_classes)
testy = keras.utils.to_categorical(testy, num_classes)
 
print(trainy[:5])


chanDim=-1
#定义cnn模型
def cnn_model(num_classes):
    model = Sequential()
    # 建立基于keras的cnn模型
    model.add(Conv2D(32, (3, 3), padding="same",
			input_shape=(32,32,3)))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))
        
        # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=-1))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

		# (CONV => RELU) * 2 => POO
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))
         # first (and only) set of FC => RELU layer
    model.add(Flatten())
    model.add(Dense(1000))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))
 
		# use a *softmax* activation for single-label classification
		# and *sigmoid* activation for multi-label classification
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
 
    # initiate RMSprop optimizer
    # 均方根反向传播(RMSprop,root mean square prop)优化
    opt = keras.optimizers.SGD(lr=0.02, momentum=0, decay=0.0001, nesterov=False)
 
    # Let's train the model using RMSprop
    # 使用均方根反向传播(RMSprop)训练模型
    model.compile(loss='categorical_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])
    model.summary()
    return model

model=cnn_model(12)
H=model.fit(trainx,trainy,batch_size=batch_size,epochs=epochs,verbose=1,validation_data=(testx,testy))#训练10轮并打印日志

score=model.evaluate(testx,testy,verbose=0)
print('accuracy:'+str(score[1]))

if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
model_path = os.path.join(save_dir, model_name)
model.save(model_path)
print('Saved trained model at %s ' % model_path)

# plot the training loss and accuracy
plt.style.use("ggplot")
plt.figure()
N = epochs
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.show()

总结

数据预处理很重要,一定要得到满足网络需要的数据,但是也不要乱处理,导致数据不正确,影响预测结果

使用预测

在这里插入图片描述

# -*- coding: utf-8 -*-
"""
Created on Thu Jan 14 09:21:41 2021

@author: yuyanchuan
"""

import os
import pandas as pd
import keras
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential,load_model
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Conv2D, MaxPooling2D
from keras.layers.normalization import BatchNormalization
import cv2
import numpy as np
from keras.preprocessing.image import img_to_array
import matplotlib.pyplot as plt
from sklearn.preprocessing import normalize



save_dir = os.path.join(os.getcwd(), 'saved_models') # 训练的模型保存路径
model_name = 'otherCnnTest.h5' # 训练的模型名称

model_path = os.path.join(save_dir, model_name)

model=load_model(model_path)

predImg=cv2.imread('3l.png')
predImg=cv2.resize(predImg,(32,32))

predImg=np.array(predImg)

print(predImg.shape)
predImg.shape=(1,32,32,3)


pred=model.predict(predImg)

predict = np.argmax(pred,axis=1)  #axis = 1是取行的最大值的索引,0是列的最大值的索引
print(pred)
print(predict)

输出
[[5.6554053e-11 5.6276331e-11 1.3570735e-07 9.9999905e-01 8.4789518e-09
2.0978609e-11 1.6956882e-11 2.8840927e-12 8.5878776e-11 8.9162802e-07
1.1190726e-11 4.4292525e-11]]
[3]

3对应的就是3L,左手3
但是如果预测其他彩色图片,还是不准,一直预测的是5,还是需要增加其他数据集进行训练才行。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值