使用肤色检测法和Lenet-5实现手势识别
数据集的制作
使用摄像头录制视频并用imwrite函数记录下来
def camo ():
start = 0
i = 0
count = 0
while True:
cv2.resizeWindow("camo", screenx, screeny)
ret, frame = capture.read() # 读取摄像头
train,_ = train_pic(frame) # 获取64*64去除背景的黑白图片
cv2.imshow("train",train)
if start==1 and i%5==0:#当按下开始后五帧一记录
cv2.imwrite('./train/PRAY/pra_'+str(count)+'.jpg',train)
print('write'+str(count)+'complite')
count += 1
i = i+1
k = cv2.waitKey(10)
if k == ord('s'):
start = 1
print('start')
if k == 27: # press ESC to exit
break
cv2.destroyAllWindows()
获取原始图像后,再进行背景去除
def removeBG(frame):
fgbg = cv2.createBackgroundSubtractorMOG2() # 利用BackgroundSubtractorMOG2算法消除背景
fgmask = fgbg.apply(frame)
res = cv2.bitwise_and(frame, frame, mask=fgmask)
ycrcb = cv2.cvtColor(res, cv2.COLOR_BGR2YCrCb) # 分解为YUV图像,得到CR分量
(_, cr, _) = cv2.split(ycrcb)
cr1 = cv2.GaussianBlur(cr, (3, 3), 0) # 高斯滤波
_, skin = cv2.threshold(cr1, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU) # OTSU图像二值化
kernel = np.ones((3,3), np.uint8) #设置卷积核
erosion = cv2.erode(skin, kernel) #腐蚀操作
dilation = cv2.dilate(erosion, kernel)#膨胀操作
return dilation
得到图像:
找出最大轮廓并在原图标记主体:
def findcnts_and_box_point(closed):
# 找到所有轮廓
cnts,_ = cv2.findContours(
closed.copy(),
cv2.RETR_LIST,
cv2.CHAIN_APPROX_SIMPLE )
# compute the rotated bounding box of the largest contour
#将最大区域转为白色,其他区域转为黑色
area = []
for i in range(len(cnts)):
area.append(cv2.contourArea(cnts[i]))
max_idx = np.argmax(area)
for i in range(max_idx - 1):
cv2.fillConvexPoly(closed, cnts[max_idx - 1], 0)
for i in range(max_idx +1,len(cnts)):
cv2.fillConvexPoly(closed, cnts[max_idx - 1], 0)
cv2.fillConvexPoly(closed, cnts[max_idx], 255)
# 绘制矩形
# cv2.drawContours(img, [box], 0, (255, 0, 255), 3)
rect = cv2.minAreaRect(cnts[max_idx])
box = cv2.boxPoints(rect) # 计算最小面积矩形的坐标
box = np.int0(box) # 将坐标规范化为整数
#rect = cv2.minAreaRect(c)
#box = np.int0(cv2.boxPoints(rect))
return closed,box,cnts[max_idx]
函数返回的box值标注出来,得到图像:
将最大轮廓填充为白色,其他区域填充为黑色
再使用剪裁函数,将原图和模板图片进行剪裁,叠加
def drawcnts_and_cut(original_img, box):
# 因为这个函数有极强的破坏性,所有需要在img.copy()上画
# draw a bounding box arounded the detected barcode and display the image
#对原图片进行剪裁
Xs = [i[0] for i in box]
Ys = [i[1] for i in box]
x1 = min(Xs)
x2 = max(Xs)
y1 = min(Ys)
y2 = max(Ys)
wight = int((y2-y1)*1.2)
#draw_0 = cv2.rectangle(image, (x_center-h, y_center-h), (x_center+h, y_center+h), (255, 0, 0), 2)
draw_img = cv2.rectangle(original_img.copy(), (x1,y1),(x2,y2), (0, 0, 255), 3)#绘制方框
crop_img = original_img[y1:y1+wight, x1:x1+wight]
return draw_img,crop_img
def reverse_color(img):
height, width = img.shape
# 彩色图像颜色反转 NewR = 255-R
dst1 = np.zeros((height,width), dtype=np.uint8)
for i in range(0, height):
for j in range(0,width):
gray = img[i, j]
dst1[i, j] = 255-gray
return dst1
def train_pic(img):
close = removeBG(img)#肤色检测法
skin,box,maxcnt = findcnts_and_box_point(close)#返回去噪模板,主体中心坐标,和最大轮廓
cv2.imshow("mask",skin)
draw_img, crop_img = drawcnts_and_cut(img,box)#返回剪裁好的图片,和方框
_,mask_cut = drawcnts_and_cut(skin,box)#在同样的位置剪裁模板
cv2.imshow("cut",draw_img)
if crop_img.size>0:
crop_img = cv2.cvtColor(crop_img,cv2.COLOR_BGR2GRAY)
origin_64 = cv2.resize(crop_img,(40,40),interpolation= cv2.INTER_AREA) #变化图像大小为64
mask_64 = cv2.resize(mask_cut,(40,40),interpolation= cv2.INTER_AREA)
re_mask_64 = reverse_color(mask_64)
result = cv2.add(origin_64,re_mask_64)#融合图像
else :
grey_img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
origin_64 = cv2.resize(grey_img,(40,40),interpolation= cv2.INTER_AREA)
mask_64 = cv2.resize(skin,(40,40),interpolation= cv2.INTER_AREA)
re_mask_64 = reverse_color(mask_64)
result = cv2.add(origin_64,re_mask_64)
return result,draw_img#返回带方框的原图和训练图片
最后得到图片如下:
每个手势大概做200张原始图,使用数据增强代码增强到10000张
下载地址:链接:https://share.weiyun.com/KNxrxEau 密码:awkum8
模型的训练
首先读取图片数据集
import os
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
from numpy import *
def get_file(file_dir):
one = []
label_one = []
two = []
label_two = []
three = []
label_three = []
four = []
label_four = []
five = []
label_five = []
ok = []
label_six = []
six =[]
label_seven = []
seven = []
label_eight = []
eight = []
label_nine = []
nine = []
label_ten = []
ten = []
label_ok = []
good = []
label_good = []
for file in os.listdir(file_dir + '/ONE'):
one.append(file_dir + '/ONE' + '/' + file)
label_one.append(0)
for file in os.listdir(file_dir + '/TWO'):
two.append(file_dir + '/TWO' + '/' + file)
label_two.append(1)
for file in os.listdir(file_dir + '/THREE'):
three.append(file_dir + '/THREE' + '/' + file)
label_three.append(2)
for file in os.listdir(file_dir + '/FOUR'):
four.append(file_dir + '/FOUR' + '/' + file)
label_four.append(3)
for file in os.listdir(file_dir + '/FIVE'):
five.append(file_dir + '/FIVE' + '/' + file)
label_five.append(4)
for file in os.listdir(file_dir + '/SIX'):
six.append(file_dir + '/SIX' + '/' + file)
label_six.append(5)
for file in os.listdir(file_dir + '/SEVEN'):
seven.append(file_dir + '/SEVEN' + '/' + file)
label_seven.append(6)
for file in os.listdir(file_dir + '/EIGHT'):
eight.append(file_dir + '/EIGHT' + '/' + file)
label_eight.append(7)
for file in os.listdir(file_dir + '/NINE'):
nine.append(file_dir + '/NINE' + '/' + file)
label_nine.append(8)
for file in os.listdir(file_dir + '/TEN'):
ten.append(file_dir + '/TEN' + '/' + file)
label_ten.append(9)
for file in os.listdir(file_dir + '/OK'):
ok.append(file_dir + '/OK' + '/' + file)
label_ok.append(10)
for file in os.listdir(file_dir + '/GOOD'):
good.append(file_dir + '/GOOD' + '/' + file)
label_good.append(11)
image_list = np.hstack((one, two,three,four,five,six,seven,eight,nine,ten,ok,good))
label_list = np.hstack((label_one, label_two, label_three, label_four, label_five,label_six,label_seven,label_eight,label_nine,label_ten, label_ok , label_good))
temp = np.array([image_list, label_list]) # 转换成2维矩阵
temp = temp.transpose() # 转置
np.random.shuffle(temp) # 按行随机打乱顺序函数
return image_list, label_list
再把图片转为H5文件
def image_to_h5(X_dirs,Y):
counter = 0
X = []
for dirs in X_dirs:
counter = counter+1
im = cv2.imread(dirs,0)
print("正在处理第%d张照片"%counter)
#resize_im = cv2.resize(im,(40,40),interpolation= cv2.INTER_AREA)
#img_gray = cv2.cvtColor(resize_im,cv2.COLOR_RGB2GRAY)
mat = np.asarray(im) #image 转矩阵
X.append(mat)
aa = np.array(X)
num,_,_ = aa.shape
aa.reshape(num,40,40,1)
print(aa.shape)
file = h5py.File("dataset//data_notwhite.h5","w")
file.create_dataset('X', data=aa)
file.create_dataset('Y', data=np.array(Y))
file.close()
#test
# data = h5py.File("dataset//data.h5","r")
# X_data = data['X']
# print(X_data.shape)
# Y_data = data['Y']
# print(Y_data[123])
# image = Image.fromarray(X_data[123]) #矩阵转图片并显示
# image.show()
if __name__ == "__main__":
# print("start.....: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
# resize_img()
# print("end....: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
train_dir = 'E:/hand_gesture_dataset'
train, train_label = get_file(train_dir)
image_to_h5(train, train_label)
#test
data = h5py.File("dataset//data_notwhite.h5","r")
X_data = data['X']
print(X_data.shape)
Y_data = data['Y']
print(Y_data[1235])
image = Image.fromarray(X_data[1235]) #矩阵转图片并显示
image.show()
开始训练:
import h5py
import numpy as np
from sklearn.model_selection import train_test_split
from keras.utils import np_utils
import tensorflow as tf
import math
import time
import matplotlib.pyplot as plt
from tensorflow.python.framework import graph_util
import os
os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID"
os.environ['CUDA_VISIBLE_DEVICES'] = "0,1"
train_dir = './model'
#load dataset
def load_dataset():
#划分训练集、测试集
data = h5py.File("dataset//data_notwhite.h5","r")
X_data = np.array(data['X']) #data['X']是h5py._hl.dataset.Dataset类型,转化为array
Y_data = np.array(data['Y'])
num,_,_ = X_data.shape
X_data = X_data.reshape(num,40,40,1)
print(type(X_data))
X_train, X_test, y_train, y_test = train_test_split(X_data, Y_data, train_size=0.9, test_size=0.1, random_state=22)
# print(X_train.shape)
# print(y_train[456])
# image = Image.fromarray(X_train[456])
# image.show()
# y_train = y_train.reshape(1,y_train.shape[0])
# y_test = y_test.reshape(1,y_test.shape[0])
print(X_train.shape)
# print(X_train[0])
X_train = X_train / 255. # 归一化
X_test = X_test / 255.
# print(X_train[0])
# one-hot
y_train = np_utils.to_categorical(y_train, num_classes=14)
print(y_train.shape)
y_test = np_utils.to_categorical(y_test, num_classes=14)
print(y_test.shape)
return X_train, X_test, y_train, y_test
def weight_variable(shape):
tf.set_random_seed(1)
return tf.Variable(tf.truncated_normal(shape, stddev=0.1))
def bias_variable(shape):
return tf.Variable(tf.constant(0.0, shape=shape))
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1,1,1,1], padding='SAME')
def max_pool_2x2(z):
return tf.nn.max_pool(z, ksize=[1,2,2,1], strides=[1,2,2,1], padding='SAME')
def random_mini_batches(X, Y, mini_batch_size=100, seed=0):
"""
Creates a list of random minibatches from (X, Y)
Arguments:
X -- input data, of shape (input size, number of examples)
Y -- true "label" vector (containing 0 if cat, 1 if non-cat), of shape (1, number of examples)
mini_batch_size - size of the mini-batches, integer
seed -- this is only for the purpose of grading, so that you're "random minibatches are the same as ours.
Returns:
mini_batches -- list of synchronous (mini_batch_X, mini_batch_Y)
"""
m = X.shape[0] # number of training examples
mini_batches = []
np.random.seed(seed)
# Step 1: Shuffle (X, Y)
permutation = list(np.random.permutation(m))
shuffled_X = X[permutation]
shuffled_Y = Y[permutation,:].reshape((m, Y.shape[1]))
print("shuffled done")
# Step 2: Partition (shuffled_X, shuffled_Y). Minus the end case.
num_complete_minibatches = math.floor(m / mini_batch_size) # number of mini batches of size mini_batch_size in your partitionning
for k in range(0, num_complete_minibatches):
mini_batch_X = shuffled_X[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
mini_batch_Y = shuffled_Y[k * mini_batch_size: k * mini_batch_size + mini_batch_size]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
# Handling the end case (last mini-batch < mini_batch_size)
if m % mini_batch_size != 0:
mini_batch_X = shuffled_X[num_complete_minibatches * mini_batch_size: m]
mini_batch_Y = shuffled_Y[num_complete_minibatches * mini_batch_size: m]
mini_batch = (mini_batch_X, mini_batch_Y)
mini_batches.append(mini_batch)
return mini_batches
def learning_curve(train_acc,test_acc,stride=20):
x = len(train_acc)
xlen = x*stride
xLim = np.arange(0,xlen,stride)
plt.figure()
plt.plot(xLim,train_acc,color='r',label='Training acc')
plt.plot(xLim,test_acc,color='b',label='Testing acc')
plt.legend()
plt.show()
def cnn_model(X_train, y_train, X_test, y_test, keep_prob, lamda, num_epochs = 450, minibatch_size = 100):
X = tf.placeholder(tf.float32, [None, 40, 40 , 1], name="input_x")
y = tf.placeholder(tf.float32, [None, 14], name="input_y")
kp = tf.placeholder_with_default(1.0, shape=(), name="keep_prob")
lam = tf.placeholder(tf.float32, name="lamda")
#conv1
W_conv1 = weight_variable([5,5,1,32])
b_conv1 = bias_variable([32])
z1 = tf.nn.relu(conv2d(X, W_conv1) + b_conv1)
maxpool1 = max_pool_2x2(z1) #max_pool1完后maxpool1维度为[?,20,20,32]
#conv2
W_conv2 = weight_variable([5,5,32,64])
b_conv2 = bias_variable([64])
z2 = tf.nn.relu(conv2d(maxpool1, W_conv2) + b_conv2)
maxpool2 = max_pool_2x2(z2) #max_pool2,shape [?,10,10,64]
#conv3 效果比较好的一次模型是没有这一层,只有两次卷积层,隐藏单元100,训练20次
# W_conv3 = weight_variable([5, 5, 64, 128])
# b_conv3 = bias_variable([128])
# z3 = tf.nn.relu(conv2d(maxpool2, W_conv3) + b_conv3)
# maxpool3 = max_pool_2x2(z3) # max_pool3,shape [?,8,8,128]
#full connection1
W_fc1 = weight_variable([10*10*64, 512])
b_fc1 = bias_variable([512])
maxpool2_flat = tf.reshape(maxpool2, [-1, 10*10*64])
z_fc1 = tf.nn.relu(tf.matmul(maxpool2_flat, W_fc1) + b_fc1)
z_fc1_drop = tf.nn.dropout(z_fc1, keep_prob=kp)
#softmax layer
W_fc2 = weight_variable([512, 14])
b_fc2 = bias_variable([14])
z_fc2 = tf.add(tf.matmul(z_fc1_drop, W_fc2),b_fc2, name="outlayer")
prob = tf.nn.softmax(z_fc2, name="probability")
#cost function
regularizer = tf.contrib.layers.l2_regularizer(lam)#l2正则化,防止过拟合
regularization = regularizer(W_fc1) + regularizer(W_fc2)
cost = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=z_fc2)) + regularization
train = tf.train.AdamOptimizer().minimize(cost)
# output_type='int32', name="predict"
pred = tf.argmax(prob, 1, output_type="int32", name="predict") # 输出结点名称predict方便后面保存为pb文件
correct_prediction = tf.equal(pred, tf.argmax(y, 1, output_type='int32'))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
tf.set_random_seed(1) # to keep consistent results
seed = 0
acc = 0.97
train_accs = []
test_accs = []
init = tf.global_variables_initializer()
with tf.Session() as sess:
#sess=tf.Session(config=tf.ConfigProto(log_device_placement=True))
sess.run(init)
step = 0
for epoch in range(num_epochs):
seed = seed + 1
epoch_cost = 0.
num_minibatches = int(X_train.shape[0] / minibatch_size)
minibatches = random_mini_batches(X_train, y_train, minibatch_size, seed)
minibatchesTest = random_mini_batches(X_test,y_test,minibatch_size,seed)
test_i = 0
for minibatch in minibatches:
(minibatch_X, minibatch_Y) = minibatch
_, minibatch_cost = sess.run([train, cost], feed_dict={X: minibatch_X, y: minibatch_Y, kp: keep_prob, lam: lamda})
epoch_cost += minibatch_cost / num_minibatches
step = step+1
if(step % 20 == 0):
(minibatchtest_X, minibatchtest_Y) = minibatchesTest[test_i]
test_i = test_i + 1
test_acc = accuracy.eval(feed_dict={X: minibatchtest_X, y: minibatchtest_Y, lam: lamda})
train_acc = accuracy.eval(feed_dict={X: minibatch_X, y: minibatch_Y, lam: lamda})
train_accs.append(train_acc)
test_accs.append(test_acc)
print("test accuracy", test_acc)
print("cost", minibatch_cost)
if test_acc>acc:
acc = test_acc
saver = tf.train.Saver({'W_conv1':W_conv1, 'b_conv1':b_conv1, 'W_conv2':W_conv2, 'b_conv2':b_conv2,
'W_fc1':W_fc1, 'b_fc1':b_fc1, 'W_fc2':W_fc2, 'b_fc2':b_fc2})
checkpoint_path = os.path.join(train_dir, 'thing.ckpt')
saver.save(sess, checkpoint_path ,global_step=step)
print("Cost after epoch %i: %f" % (epoch, epoch_cost))
print(str((time.strftime('%Y-%m-%d %H:%M:%S'))))
# 这个accuracy是前面的accuracy,tensor.eval()和Session.run区别很小
train_acc = accuracy.eval(feed_dict={X: X_train[:1000], y: y_train[:1000], kp: 0.8, lam: lamda})
print("train accuracy", train_acc)
test_acc = accuracy.eval(feed_dict={X: X_test[:1000], y: y_test[:1000], lam: lamda})
print("test accuracy", test_acc)
#save model
saver = tf.train.Saver({'W_conv1':W_conv1, 'b_conv1':b_conv1, 'W_conv2':W_conv2, 'b_conv2':b_conv2,
'W_fc1':W_fc1, 'b_fc1':b_fc1, 'W_fc2':W_fc2, 'b_fc2':b_fc2})
saver.save(sess, "model//cnn_model.ckpt")
#将训练好的模型保存为.pb文件,方便在Android studio中使用
output_graph_def = graph_util.convert_variables_to_constants(sess, sess.graph_def, output_node_names=['predict'])
with tf.gfile.FastGFile('model//digital_gesture.pb', mode='wb') as f: # ’wb’中w代表写文件,b代表将数据以二进制方式写入文件。
f.write(output_graph_def.SerializeToString())
learning_curve(train_accs,test_accs, 20)
if __name__ == "__main__":
print("载入数据集: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
X_train, X_test, y_train, y_test = load_dataset()
print("开始训练: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))
cnn_model(X_train, y_train, X_test, y_test, 0.7, 0.01, num_epochs=2, minibatch_size=80)
print("训练结束: " + str((time.strftime('%Y-%m-%d %H:%M:%S'))))