使用CNN实现手势识别（内附数据集链接）

最新推荐文章于 2025-03-28 08:30:48 发布

Accut_

最新推荐文章于 2025-03-28 08:30:48 发布

阅读量7.7k

点赞数 21

文章标签： tensorflow 深度学习

本文链接：https://blog.csdn.net/Accut_/article/details/114419109

版权

使用肤色检测法和Lenet-5实现手势识别

数据集的制作

使用摄像头录制视频并用imwrite函数记录下来

def camo ():

  start = 0
  i = 0
  count = 0
  while True:
     
     cv2.resizeWindow("camo", screenx, screeny)
     ret, frame = capture.read()  # 读取摄像头
     train,_ = train_pic(frame) # 获取64*64去除背景的黑白图片
     cv2.imshow("train",train)
     if start==1 and i%5==0:#当按下开始后五帧一记录
         
         cv2.imwrite('./train/PRAY/pra_'+str(count)+'.jpg',train)
         print('write'+str(count)+'complite')
         count += 1
     i = i+1
     k = cv2.waitKey(10)
     if k == ord('s'):
         start = 1
         print('start')
     
     if k == 27:  # press ESC to exit

         break
     
     
  cv2.destroyAllWindows()

获取原始图像后，再进行背景去除

def removeBG(frame):
   fgbg = cv2.createBackgroundSubtractorMOG2()  # 利用BackgroundSubtractorMOG2算法消除背景
   fgmask = fgbg.apply(frame)
   res = cv2.bitwise_and(frame, frame, mask=fgmask)
   
   ycrcb = cv2.cvtColor(res, cv2.COLOR_BGR2YCrCb)  # 分解为YUV图像,得到CR分量
   (_, cr, _) = cv2.split(ycrcb)
   cr1 = cv2.GaussianBlur(cr, (3, 3), 0)  # 高斯滤波
   _, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)  # OTSU图像二值化
   kernel = np.ones((3,3), np.uint8) #设置卷积核
   erosion = cv2.erode(skin, kernel) #腐蚀操作
   dilation = cv2.dilate(erosion, kernel)#膨胀操作
   return dilation

得到图像：
在这里插入图片描述找出最大轮廓并在原图标记主体：

def findcnts_and_box_point(closed):
    # 找到所有轮廓
    cnts,_ = cv2.findContours(
        closed.copy(), 
        cv2.RETR_LIST, 
        cv2.CHAIN_APPROX_SIMPLE )

    # compute the rotated bounding box of the largest contour
    #将最大区域转为白色，其他区域转为黑色
    area = []
    for i in range(len(cnts)):
        area.append(cv2.contourArea(cnts[i]))
    max_idx = np.argmax(area)
    for i in range(max_idx - 1):
        cv2.fillConvexPoly(closed, cnts[max_idx - 1], 0)
    for i in range(max_idx +1,len(cnts)):
        cv2.fillConvexPoly(closed, cnts[max_idx - 1], 0)
    cv2.fillConvexPoly(closed, cnts[max_idx], 255)

    # 绘制矩形
    # cv2.drawContours(img, [box], 0, (255, 0, 255), 3)
    rect = cv2.minAreaRect(cnts[max_idx])
    box = cv2.boxPoints(rect)  # 计算最小面积矩形的坐标
    box = np.int0(box)  # 将坐标规范化为整数
        

    #rect = cv2.minAreaRect(c)
    #box = np.int0(cv2.boxPoints(rect))
    
    return closed,box,cnts[max_idx]

函数返回的box值标注出来，得到图像：
在这里插入图片描述
将最大轮廓填充为白色，其他区域填充为黑色

再使用剪裁函数，将原图和模板图片进行剪裁，叠加

def drawcnts_and_cut(original_img, box):
    # 因为这个函数有极强的破坏性，所有需要在img.copy()上画
    # draw a bounding box arounded the detected barcode and display the image
    
    
    #对原图片进行剪裁
    Xs = [i[0] for i in box]
    Ys = [i[1] for i in box]
    x1 = min(Xs)
    x2 = max(Xs)
    y1 = min(Ys)
    y2 = max(Ys)
    wight = int((y2-y1)*1.2)

    #draw_0 = cv2.rectangle(image, (x_center-h, y_center-h), (x_center+h, y_center+h), (255, 0, 0), 2)
  
    draw_img = cv2.rectangle(original_img.copy(), (x1,y1),(x2,y2), (0, 0, 255), 3)#绘制方框
    
    crop_img = original_img[y1:y1+wight, x1:x1+wight]
    
    return draw_img,crop_img

def reverse_color(img):
    height, width = img.shape
    
    # 彩色图像颜色反转 NewR = 255-R
    dst1 = np.zeros((height,width), dtype=np.uint8)
    for i in range(0, height):
        for j in range(0,width):
            gray = img[i, j]
            dst1[i, j] = 255-gray
    return dst1
def train_pic(img):
    close = removeBG(img)#肤色检测法
    
    skin,box,maxcnt = findcnts_and_box_point(close)#返回去噪模板，主体中心坐标,和最大轮廓
    
    cv2.imshow("mask",skin)
    draw_img, crop_img = drawcnts_and_cut(img,box)#返回剪裁好的图片，和方框
    _,mask_cut =  drawcnts_and_cut(skin,box)#在同样的位置剪裁模板
    cv2.imshow("cut",draw_img)
    
    if crop_img.size>0:
          
          crop_img = cv2.cvtColor(crop_img,cv2.COLOR_BGR2GRAY)
          origin_64 = cv2.resize(crop_img,(40,40),interpolation= cv2.INTER_AREA) #变化图像大小为64
          mask_64 = cv2.resize(mask_cut,(40,40),interpolation= cv2.INTER_AREA)
          re_mask_64 = reverse_color(mask_64)
          result = cv2.add(origin_64,re_mask_64)#融合图像
          
          
    else :
          grey_img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
          origin_64 = cv2.resize(grey_img,(40,40),interpolation= cv2.INTER_AREA) 
          mask_64 = cv2.resize(skin,(40,40),interpolation= cv2.INTER_AREA)
          re_mask_64 = reverse_color(mask_64)
          result = cv2.add(origin_64,re_mask_64)
    
    
    return result,draw_img#返回带方框的原图和训练图片

最后得到图片如下：
在这里插入图片描述
每个手势大概做200张原始图，使用数据增强代码增强到10000张
下载地址：链接：https://share.weiyun.com/KNxrxEau 密码：awkum8

模型的训练

首先读取图片数据集

import os
import numpy as np

import tensorflow as tf
import matplotlib.pyplot as plt
from numpy import *


def get_file(file_dir):
    one = []
    label_one = []
    two = []
    label_two = []
    three = []
    label_three = []
    four = []
    label_four = []
    five = []
    label_five = []
    ok = []
    label_six = []
    six =[]
    label_seven = []
    seven = []
    label_eight = []
    eight = []
    label_nine = []
    nine = []
    label_ten = []
    ten = []

    label_ok = []
    
    good = []
    label_good = []
    
    
    for file in os.listdir(file_dir + '/ONE'):
        one.append(file_dir + '/ONE' + '/' + file)
        label_one.append(0)
    for file in os.listdir(file_dir + '/TWO'):
        two.append(file_dir + '/TWO' + '/' + file)
        label_two.append(1)
    for file in os.listdir(file_dir + '/THREE'):
        three.append(file_dir + '/THREE' + '/' + file)
        label_three.append(2)
    for file in os.listdir(file_dir + '/FOUR'):
        four.append(file_dir + '/FOUR' + '/' + file)
        label_four.append(3)
    for file in os.listdir(file_dir + '/FIVE'):
        five.append(file_dir + '/FIVE' + '/' + file)
        label_five.append(4)
    for file in os.listdir(file_dir + '/SIX'):
        six.append(file_dir + '/SIX' + '/' + file)
        label_six.append(5)
    for file in os.listdir(file_dir + '/SEVEN'):
        seven.append(file_dir + '/SEVEN' + '/' + file)
        label_seven.append(6)
    for file in os.listdir(file_dir + '/EIGHT'):
        eight.append(file_dir + '/EIGHT' + '/' + file)
        label_eight.append(7)
    for file in os.listdir(file_dir + '/NINE'):
        nine.append(file_dir + '/NINE' + '/' + file)
        label_nine.append(8)
    for file in os.listdir(file_dir + '/TEN'):
        ten.append(file_dir + '/TEN' + '/' + file)
        label_ten.append(9)
    for file in os.listdir(file_dir + '/OK'):
        ok.append(file_dir + '/OK' + '/' + file)
        label_ok.append(10)
    for file in os.listdir(file_dir + '/GOOD'):
        good.append(file_dir + '/GOOD' + '/' + file)
        label_good.append(11)
   
    image_list = np.hstack((one, two,three,four,five,six,seven,eight,nine,ten,ok,good))
    label_list = np.hstack((label_one, label_two, label_three, label_four, label_five,label_six,label_seven,label_eight,label_nine,label_ten, label_ok , label_good))
    temp = np.array([image_list, label_list])   # 转换成2维矩阵
    temp = temp.transpose()     # 转置
    
    np.random.shuffle(temp)     # 按行随机打乱顺序函数
    
    return image_list, label_list

再把图片转为H5文件

def image_to_h5(X_dirs,Y):
	counter = 0
	X = []
	for dirs in X_dirs:
		counter = counter+1
		im = cv2.imread(dirs,0)
		print("正在处理第%d张照片"%counter)
		#resize_im = cv2.resize(im,(40,40),interpolation= cv2.INTER_AREA)
		#img_gray = cv2.cvtColor(resize_im,cv2.COLOR_RGB2GRAY)
		mat = np.asarray(im) #image 转矩阵
		X.append(mat)
        
	aa = np.array(X)
	num,_,_ = aa.shape
	aa.reshape(num,40,40,1)
	print(aa.shape)

	file = h5py.File("dataset//data_notwhite.h5","w")
	file.create_dataset('X', data=aa)
	file.create_dataset('Y', data=np.array(Y))
	file.close()

	#test
	# data = h5py.File("dataset//data.h5","r")
	# X_data = data['X']
	# print(X_data.shape)
	# Y_data = data['Y']
	# print(Y_data[123])
	# image = Image.fromarray(X_data[123]) #矩阵转图片并显示
	# image.show()


if __name__ == "__main__":
	# print("start.....: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
	# resize_img()
	# print("end....: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
  train_dir = 'E:/hand_gesture_dataset'
  train, train_label = get_file(train_dir)
  image_to_h5(train, train_label)
    	#test
        
  data = h5py.File("dataset//data_notwhite.h5","r")
  X_data = data['X']
  print(X_data.shape)
  Y_data = data['Y']
  print(Y_data[1235])
  image = Image.fromarray(X_data[1235]) #矩阵转图片并显示
  image.show()

开始训练：

import h5py
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
import tensorflow as tf
import math
import time
import matplotlib.pyplot as plt
from tensorflow.python.framework import graph_util
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
train_dir = './model'
#load dataset
def load_dataset():
	#划分训练集、测试集
	data = h5py.File("dataset//data_notwhite.h5","r")
	X_data = np.array(data['X']) #data['X']是h5py._hl.dataset.Dataset类型，转化为array
	Y_data = np.array(data['Y'])
	num,_,_ = X_data.shape
	X_data = X_data.reshape(num,40,40,1)
	print(type(X_data))
	X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, train_size=0.9, test_size=0.1, random_state=22)
	# print(X_train.shape)
	# print(y_train[456])
	# image = Image.fromarray(X_train[456])
	# image.show()
	# y_train = y_train.reshape(1,y_train.shape[0])
	# y_test = y_test.reshape(1,y_test.shape[0])
	print(X_train.shape)
	# print(X_train[0])
	X_train = X_train / 255.  # 归一化
	X_test = X_test / 255.
	# print(X_train[0])
	# one-hot
	y_train = np_utils.to_categorical(y_train, num_classes=14)
	print(y_train.shape)
	y_test = np_utils.to_categorical(y_test, num_classes=14)
	print(y_test.shape)

	return X_train, X_test, y_train, y_test


def weight_variable(shape):
	tf.set_random_seed(1)
	return tf.Variable(tf.truncated_normal(shape, stddev=0.1))

def bias_variable(shape):
	return tf.Variable(tf.constant(0.0, shape=shape))

def conv2d(x, W):
	return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')

def max_pool_2x2(z):
	return tf.nn.max_pool(z, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')


def random_mini_batches(X, Y, mini_batch_size=100, seed=0):
	"""
	Creates a list of random minibatches from (X, Y)

	Arguments:
	X -- input data, of shape (input size, number of examples)
	Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
	mini_batch_size - size of the mini-batches, integer
	seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.

	Returns:
	mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
	"""

	m = X.shape[0]  # number of training examples
	mini_batches = []
	np.random.seed(seed)

	# Step 1: Shuffle (X, Y)
	permutation = list(np.random.permutation(m))
	shuffled_X = X[permutation]
	shuffled_Y = Y[permutation,:].reshape((m, Y.shape[1]))
	print("shuffled done")

	# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
	num_complete_minibatches = math.floor(m / mini_batch_size)  # number of mini batches of size mini_batch_size in your partitionning
	for k in range(0, num_complete_minibatches):
		mini_batch_X = shuffled_X[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
		mini_batch_Y = shuffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
		mini_batch = (mini_batch_X, mini_batch_Y)
		mini_batches.append(mini_batch)

	# Handling the end case (last mini-batch < mini_batch_size)
	if m % mini_batch_size != 0:
		mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size: m]
		mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size: m]
		mini_batch = (mini_batch_X, mini_batch_Y)
		mini_batches.append(mini_batch)

	return mini_batches

def learning_curve(train_acc,test_acc,stride=20):
    
    x = len(train_acc)
    xlen = x*stride
    xLim = np.arange(0,xlen,stride)
    plt.figure()
    plt.plot(xLim,train_acc,color='r',label='Training acc')
    plt.plot(xLim,test_acc,color='b',label='Testing acc')
    plt.legend()
    plt.show()
    
    
def cnn_model(X_train, y_train, X_test, y_test, keep_prob, lamda, num_epochs = 450, minibatch_size = 100):
	X = tf.placeholder(tf.float32, [None, 40, 40 , 1], name="input_x")
	y = tf.placeholder(tf.float32, [None, 14], name="input_y")
	kp = tf.placeholder_with_default(1.0, shape=(), name="keep_prob")
	lam = tf.placeholder(tf.float32, name="lamda")
	#conv1
	W_conv1 = weight_variable([5,5,1,32])
	b_conv1 = bias_variable([32])
	z1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1)
	maxpool1 = max_pool_2x2(z1) #max_pool1完后maxpool1维度为[?,20,20,32]


	#conv2
	W_conv2 = weight_variable([5,5,32,64])
	b_conv2 = bias_variable([64])
	z2 = tf.nn.relu(conv2d(maxpool1, W_conv2) + b_conv2)
	maxpool2 = max_pool_2x2(z2) #max_pool2,shape [?,10,10,64]

	#conv3  效果比较好的一次模型是没有这一层，只有两次卷积层，隐藏单元100，训练20次
	# W_conv3 = weight_variable([5, 5, 64, 128])
	# b_conv3 = bias_variable([128])
	# z3 = tf.nn.relu(conv2d(maxpool2, W_conv3) + b_conv3)
	# maxpool3 = max_pool_2x2(z3)  # max_pool3,shape [?,8,8,128]

	#full connection1
	W_fc1 = weight_variable([10*10*64, 512])
	b_fc1 = bias_variable([512])
	maxpool2_flat = tf.reshape(maxpool2, [-1, 10*10*64])
	z_fc1 = tf.nn.relu(tf.matmul(maxpool2_flat, W_fc1) + b_fc1)
	z_fc1_drop = tf.nn.dropout(z_fc1, keep_prob=kp)

	#softmax layer
	W_fc2 = weight_variable([512, 14])
	b_fc2 = bias_variable([14])
	z_fc2 = tf.add(tf.matmul(z_fc1_drop, W_fc2),b_fc2, name="outlayer")
	prob = tf.nn.softmax(z_fc2, name="probability")
	#cost function
	regularizer = tf.contrib.layers.l2_regularizer(lam)#l2正则化,防止过拟合
	regularization = regularizer(W_fc1) + regularizer(W_fc2)
	cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=z_fc2)) + regularization

	train = tf.train.AdamOptimizer().minimize(cost)
	# output_type='int32', name="predict"
	pred = tf.argmax(prob, 1, output_type="int32", name="predict")  # 输出结点名称predict方便后面保存为pb文件
	correct_prediction = tf.equal(pred, tf.argmax(y, 1, output_type='int32'))
	accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
	tf.set_random_seed(1)  # to keep consistent results

	seed = 0
	acc = 0.97
	train_accs = []
	test_accs = []
	init = tf.global_variables_initializer()
	
	with tf.Session() as sess:
		#sess=tf.Session(config=tf.ConfigProto(log_device_placement=True))
		sess.run(init) 
		step = 0

		for epoch in range(num_epochs):
			seed = seed + 1
			epoch_cost = 0.
			num_minibatches = int(X_train.shape[0] / minibatch_size)
			minibatches = random_mini_batches(X_train, y_train, minibatch_size, seed)
			minibatchesTest = random_mini_batches(X_test,y_test,minibatch_size,seed)
			test_i = 0
			for minibatch in minibatches:
				
				(minibatch_X, minibatch_Y) = minibatch
                
				_, minibatch_cost = sess.run([train, cost], feed_dict={X: minibatch_X, y: minibatch_Y, kp: keep_prob, lam: lamda})
				epoch_cost += minibatch_cost / num_minibatches
				step = step+1
				if(step % 20 == 0):
				   (minibatchtest_X, minibatchtest_Y) = minibatchesTest[test_i]               
				   test_i = test_i + 1
				   test_acc = accuracy.eval(feed_dict={X: minibatchtest_X, y: minibatchtest_Y, lam: lamda})
				   train_acc = accuracy.eval(feed_dict={X: minibatch_X, y: minibatch_Y, lam: lamda})
				   train_accs.append(train_acc)
				   test_accs.append(test_acc)
				   print("test accuracy", test_acc)      
				   print("cost", minibatch_cost)  
				   if test_acc>acc:
				       acc = test_acc
				       saver = tf.train.Saver({'W_conv1':W_conv1, 'b_conv1':b_conv1, 'W_conv2':W_conv2, 'b_conv2':b_conv2,
		                        'W_fc1':W_fc1, 'b_fc1':b_fc1, 'W_fc2':W_fc2, 'b_fc2':b_fc2})
				       checkpoint_path = os.path.join(train_dir, 'thing.ckpt')
				       saver.save(sess, checkpoint_path ,global_step=step)
			print("Cost after epoch %i: %f" % (epoch, epoch_cost))
			print(str((time.strftime('%Y-%m-%d %H:%M:%S'))))
				

		# 这个accuracy是前面的accuracy，tensor.eval()和Session.run区别很小
		train_acc = accuracy.eval(feed_dict={X: X_train[:1000], y: y_train[:1000], kp: 0.8, lam: lamda})
		print("train accuracy", train_acc)
		test_acc = accuracy.eval(feed_dict={X: X_test[:1000], y: y_test[:1000], lam: lamda})
		print("test accuracy", test_acc)

		#save model
		saver = tf.train.Saver({'W_conv1':W_conv1, 'b_conv1':b_conv1, 'W_conv2':W_conv2, 'b_conv2':b_conv2,
		                        'W_fc1':W_fc1, 'b_fc1':b_fc1, 'W_fc2':W_fc2, 'b_fc2':b_fc2})
		saver.save(sess, "model//cnn_model.ckpt")
		#将训练好的模型保存为.pb文件，方便在Android studio中使用
		output_graph_def = graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=['predict'])
		with tf.gfile.FastGFile('model//digital_gesture.pb', mode='wb') as f:  # ’wb’中w代表写文件，b代表将数据以二进制方式写入文件。
			f.write(output_graph_def.SerializeToString())
		learning_curve(train_accs,test_accs, 20)

if __name__ == "__main__":

	print("载入数据集: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
	X_train, X_test, y_train, y_test = load_dataset()
	print("开始训练: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
	cnn_model(X_train, y_train, X_test, y_test, 0.7, 0.01, num_epochs=2, minibatch_size=80)
	print("训练结束: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))