最近在学习tensorflow卷积神经网络,做了一个基于opencv的小系统来识别人脸
opencv被用来获取人脸范围以及储存样本,
numpy辅助一些矩阵的运算
文件存放结构是这样的:
network:
faces:
成员1:
face1
face2
。。。
成员2:
face1
face2
。。。
。。3 :
。。。:
data_saved:
***.ckpt
。。。
调用了这些库,其中有两个自己编写的文件
import tensorflow as tf
import cv2
import numpy as np
import text
import img
接下来上主代码
Root_path = r"C:\\Users\\96904\\Desktop\\network"
Save_path = Root_path + "/data_saved/save_net.ckpt"
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
定义文件存放路径以及实现卷积所用的库
while(True):
a = img.imgs(Root_path)
try:
start=int(input("输入1开始读取面部特征\n输入2开始训练\n输入3实时识别\n输入0退出系统:"))
except:
start = 0
print("输入错误,请重新输入")
if(start==1):#获取样本
a.get_faces()
elif(start == 2):#开始训练
mode = int(input("输入1开始训练\n输入2开始测试精度\n输入3开始识别\n输入0返回:"))
if(mode == 0):#返回上一步
continue
#否则开始描述tensorflow的图
x_batch, y_batch, x_test, y_test, names= a.get_batchs()
x_batch, y_batch = a.shuffle_batch(x_batch, y_batch)
x_test, y_test = a.shuffle_batch(x_test, y_test)
xs = tf.placeholder(dtype="float", shape=[None, 36, 36, 1])
ys = tf.placeholder(dtype="float", shape=[None, len(names)])
keep_prob = tf.placeholder("float")
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(xs, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
W_fc1 = weight_variable([5 * 5 * 128, 1024])
b_fc1 = bias_variable([1024])
h_pool3_flat = tf.reshape(h_pool3, [-1, 5 * 5 * 128])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, len(names)])
b_fc2 = bias_variable([len(names)])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = y_conv,labels=ys))
train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(ys, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))
saver = tf.train.Saver()#开启文件保存
if(mode == 1): #训练保存好的样本文件
with tf.Session() as sess:
try: #如果有对应的数据文件(.ckpt)存在就打开,否则开始构建对应本次样本的数据文件
saver.restore(sess,Save_path)
except:
init = tf.global_variables_initializer()
sess.run(init)
print("No such save file.")
losses = 999
for cnt in range (141):
tem_batch_x, tem_batch_y = a.get_next_batch(cnt, 20, x_batch, y_batch)
if(cnt%20 == 0):
#tem_batch_x, tem_batch_y = a.get_next_batch(cnt, 10, x_batch, y_batch)
#print("Loss:", sess.run(cross_entropy, feed_dict={xs: x_test, ys: y_test, keep_prob: 1.0}))
#train_accuracy = accuracy.eval(session=sess,feed_dict={xs: tem_batch_x, ys: tem_batch_y, keep_prob: 1.0})
train_accuracy = sess.run(accuracy, feed_dict={xs: x_test, ys: y_test, keep_prob: 1.0})
loss = sess.run(cross_entropy, feed_dict={xs: x_test, ys: y_test, keep_prob: 1.0})
print("step %d, training accuracy %g" % (cnt, train_accuracy))
if(loss < losses):
losses = loss
saver.save(sess, Save_path)
train_step.run(session=sess, feed_dict={xs: tem_batch_x, ys: tem_batch_y, keep_prob: 0.1})
tf.reset_default_graph() #清除此次的内存堆栈,为下次运行做准备
if(mode == 2): #测试样本准确度
with tf.Session() as sess:
try:
saver.restore(sess, Save_path)
except:
print("please train the module first.")
continue
for cnt in range(201):
tem_batch_x, tem_batch_y = a.get_next_batch(cnt, 20, x_batch, y_batch) #用a.中的方法获取样本
if (cnt % 20 == 0):
train_accuracy = accuracy.eval(session=sess,feed_dict={xs: tem_batch_x, ys: tem_batch_y, keep_prob: 1.0})
print("step %d, training accuracy %g" % (cnt, train_accuracy))
tf.reset_default_graph()
if(mode == 3): #从摄像头读取20个样本测试是否正确
face_label = tf.argmax(y_conv, 1)
with tf.Session() as sess:
faces = a.get_tem_faces()
try:
saver.restore(sess, Save_path)
except:
print("please train the module first.")
continue
face_label = sess.run(face_label,feed_dict={xs:faces, ys:[np.zeros(len(names))] ,keep_prob:1})
name_labels = []
face_labels = np.zeros(len(names))
you_are = 0
for i in face_label:
face_labels[i] = face_labels[i] + 1
for i in face_labels:
name_labels.append(i / len(face_label))
for i in range(len(name_labels)):
print(name_labels[i])
if (name_labels[i] >= 0.8):
you_are = names[i + 1]
if(you_are):
print("You are",you_are)
result = input("Am I right?[y/n]")
if(result == 'y'):
print("Ha ha, I think so (〃'▽'〃) ")
elif(result == 'n'):#如果不正确,用这次样本重新训练一遍
flag = 0
y_ = np.zeros(len(names))
faces_y = input("Please tell me who you are:")
for key in names:
if(names[key] == faces_y):
y_[key - 1] = 1
flag = 1
break
input(y_)
if(flag):
y__=[y_]
y_=y__
for i in range(19):
y_=np.append(y_,y__,axis=0)
print("I'm trying to do batter!")
train_step.run(session=sess, feed_dict={xs: faces, ys: y_, keep_prob: 0.1})
saver.save(sess, Save_path)
else:
print("Name error, please try again, 嘤嘤嘤(这里还没想好怎么重新输入)")
else:
print("I don't know who you are^(>=>)^") #同上,重新训练
flag = 0
y_ = np.zeros(len(names))
faces_y = input("Sorry I forget who you are...Please tell me who you are:")
for key in names:
if (names[key] == faces_y):
y_[key - 1] = 1
flag = 1
break
input(y_)
if (flag):
y__ = [y_]
y_ = y__
for i in range(19):
y_ = np.append(y_, y__, axis=0)
print("I'm trying to do batter!")
train_step.run(session=sess, feed_dict={xs: faces, ys: y_, keep_prob: 0.1})
saver.save(sess, Save_path)
else:
print("Name error, please try again, 嘤嘤嘤(这里还没想好怎么重新输入)")
tf.reset_default_graph()
elif(start == 3):
text.real_time()
elif(not start):
print("Bye~")
break
else:
print("输入错误,请重新输入")
这里是两个自己写的文件
文件 img.py
import cv2
import pickle
import os
import random
import tensorflow as tf
import numpy as np
test_range = 0.98
class imgs:
def __init__(self,Root_path):
self.Root_path=Root_path
def get_faces(self): #获取样本
print("正在初始化……")
i = 1
n = 0
cap = cv2.VideoCapture(0)
face = cv2.CascadeClassifier(
r"C:\\Users\\96904\\Desktop\\OpenCV\\face1-lvdaidai\\haarcascades\\haarcascade_frontalface_default.xml")
whose_name = input("请输入你的姓名")
data_range = int(input("请输入读取特征数量"))
data_path = self.Root_path + r"\\faces\\" + whose_name + r"\\data.pkl"
try:
pkl_file = open(data_path, 'rb')
i = pickle.load(pkl_file)
n = i
print(i)
pkl_file.close()
except:
pass
while (i <= data_range + n):
ret, img = cap.read()
img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face.detectMultiScale(img_g, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
for (x, y, w, h) in faces:
whose_face = img_g[y:y + h, x:x + w]
cv2.rectangle(img_g, (x, y), (x + w, y + h), (0, 0, 255), 2)
size = whose_face.shape
print(size)
save_path = self.Root_path+ r"\\faces\\" + whose_name + r"\\face" + str(i) + ".jpg"
if (len(faces) == 1):
if (size[0] > 100):
try:
whose_face = cv2.resize(whose_face, (100, 100))
cv2.imshow("face", whose_face)
cv2.imwrite(save_path, whose_face)
i += 1
except:
pass
cv2.waitKey(1)
else:
pass
else:
print("只允许一个人录入")
else:
i -= 1
output = open(data_path, 'wb')
pickle.dump(i, output)
output.close()
cap.release()
cv2.destroyAllWindows()
def get_tem_faces(self): #获取测试样本(mode == 3)
print("正在初始化……")
face_tem = []
cap = cv2.VideoCapture(0)
face = cv2.CascadeClassifier(
r"C:\\Users\\96904\\Desktop\\OpenCV\\face1-lvdaidai\\haarcascades\\haarcascade_frontalface_default.xml")
for i in range(20):
ret, img = cap.read()
img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face.detectMultiScale(img_g, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
if (len(faces) == 1):
for (x, y, w, h) in faces:
whose_face = img_g[y:y + h, x:x + w]
cv2.rectangle(img_g, (x, y), (x + w, y + h), (0, 0, 255), 2)
size = whose_face.shape
print(size)
if (size[0] > 100):
whose_face = cv2.resize(whose_face, (36, 36))
face_tem.append(whose_face)
cv2.waitKey(1)
else:
pass
else:
print("只允许一个人录入")
cap.release()
cv2.destroyAllWindows()
face_tem = np.array(face_tem)
face_tem = face_tem.reshape(-1, 36, 36, 1)
face_tem = face_tem * (1. / 255) - 0.5
return face_tem
def get_batchs(self): #读取样本为内存堆栈
names = {}
members_paths = {}
i = 1
j = 1
for root, dirs, files in os.walk(self.Root_path + r"\\faces"):
for name in dirs:
names[i] = name
members_paths[i] = os.path.join(root, name)
i=i+1
while j < i:
members_path = members_paths[j]+"/"
print(members_path)
feed_list = np.zeros((i - 1), dtype=np.int)
feed_list[j-1] = 1
_x = []
_y = []
for root, dirs, files in os.walk(members_path):
for name in files:
path = os.path.join(root, name)
if (os.path.splitext(path)[1] == ".jpg"):
#_x.append(cv2.imread(path, cv2.IMREAD_GRAYSCALE))
_x.append(cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(36, 36)))#减小读取像素大小
_y.append(feed_list)
try:
train_x = np.vstack((train_x, _x[0:int(len(_x) * test_range)]))
train_y = np.vstack((train_y, _y[0:int(len(_y) *test_range)]))
test_x = np.vstack((test_x, _x[int(len(_x) * test_range):len(_x)]))
test_y = np.vstack((test_y, _y[int(len(_y) * test_range):len(_y)]))
except:
train_x = _x[0:int(len(_x) * test_range)]
train_y = _y[0:int(len(_y) * test_range)]
test_x = _x[int(len(_x) * test_range):len(_x)]
test_y = _y[int(len(_y) * test_range):len(_y)]
#test_x.append(_x[int(len(_x) * test_range):len(_x) - 1])
#test_y.append(_y[int(len(_y) * test_range):len(_y) - 1])
#train_x.append(_x[0:int(len(_x) * test_range) - 1])
#train_y.append(_y[0:int(len(_y) * test_range) - 1])
j=j+1
test_x = np.array(test_x)
test_x = test_x.reshape(-1, 36, 36, 1)
test_x = test_x * (1. / 255) - 0.5
return train_x, train_y, test_x, test_y, names
def shuffle_batch(self, batch_x,batch_y): #打乱样本内容
size1 = len(batch_y)
size2 = len(batch_x)
if(size1==size2):
randnum = np.random.randint(0, size1)
np.random.seed(randnum)
np.random.shuffle(batch_x)
np.random.seed(randnum)
np.random.shuffle(batch_y)
else:
print("Error:batch_x and batch_y must has the same length.")
return batch_x, batch_y
def get_next_batch(self, cnt, batch_size, batch_x, batch_y): #获取下一次的样本来做训练
cnt = cnt * batch_size
if(cnt <= len(batch_x) - batch_size):
img = batch_x[cnt:cnt + batch_size]
lable_y = batch_y[cnt:cnt + batch_size]
else:
img = batch_x[cnt:len(batch_x)]
img = np.vstack((img, batch_x[0:cnt + batch_size - len(batch_x)]))
lable_y = batch_y[cnt:len(batch_y)]
lable_y = np.vstack((lable_y, batch_y[0:cnt + batch_size - len(batch_y)]))
img = np.array(img)
img = img.reshape(-1, 36, 36, 1)
img = img * (1. / 255) - 0.5
return img, lable_y
文件text.py 用来做实时识别
import cv2
import os
import tensorflow as tf
import numpy as np
def real_time():
Root_path = r"C:\\Users\\96904\\Desktop\\network"
Save_path = Root_path + "/data_saved/save_net.ckpt"
cap = cv2.VideoCapture(0)
face = cv2.CascadeClassifier(
r"C:\\Users\\96904\\Desktop\\OpenCV\\face1-lvdaidai\\haarcascades\\haarcascade_frontalface_default.xml")
def weight_variable(shape):
initial = tf.truncated_normal(shape, stddev=0.1)
return tf.Variable(initial)
def bias_variable(shape):
initial = tf.constant(0.1, shape=shape)
return tf.Variable(initial)
def conv2d(x, W):
return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')
def max_pool_2x2(x):
return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
strides=[1, 2, 2, 1], padding='SAME')
names = {}
i = 1
for root, dirs, files in os.walk(Root_path + r"\\faces"):
for name in dirs:
names[i] = name
i = i + 1
xs = tf.placeholder(dtype="float", shape=[None, 36, 36, 1])
keep_prob = tf.placeholder("float")
W_conv1 = weight_variable([5, 5, 1, 32])
b_conv1 = bias_variable([32])
h_conv1 = tf.nn.relu(conv2d(xs, W_conv1) + b_conv1)
h_pool1 = max_pool_2x2(h_conv1)
W_conv2 = weight_variable([5, 5, 32, 64])
b_conv2 = bias_variable([64])
h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
h_pool2 = max_pool_2x2(h_conv2)
W_conv3 = weight_variable([5, 5, 64, 128])
b_conv3 = bias_variable([128])
h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
h_pool3 = max_pool_2x2(h_conv3)
W_fc1 = weight_variable([5 * 5 * 128, 1024])
b_fc1 = bias_variable([1024])
h_pool3_flat = tf.reshape(h_pool3, [-1, 5 * 5 * 128])
h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)
W_fc2 = weight_variable([1024, len(names)])
b_fc2 = bias_variable([len(names)])
y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2
saver = tf.train.Saver()
face_label = tf.argmax(y_conv, 1)
sess = tf.Session()
try:
saver.restore(sess, Save_path)
start_flag = 1
except:
print("please train the module first.")
start_flag = 0
while(start_flag):
ret, img = cap.read()
img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
faces = face.detectMultiScale(img_g, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
if (len(faces) == 1):
for (x, y, w, h) in faces:
a = x
b = y
whose_face = img_g[y:y + h, x:x + w]
cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
size = whose_face.shape
print(size)
whose_face = cv2.resize(whose_face, (36, 36))
face_tem = np.array(whose_face)
face_tem = face_tem.reshape(-1, 36, 36, 1)
face_tem = face_tem * (1. / 255) - 0.5
else:
print("只允许一个人录入")
try:
face_labels = sess.run(face_label,feed_dict = {xs:face_tem, keep_prob:1.0})
you_are = names[face_labels[0] + 1]
cv2.putText(img, you_are, (a, b), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255))
except:
pass
cv2.imshow("test",img)
if (cv2.waitKey(1) == "q"):
break
tf.reset_default_graph()
现在还是存在问题,样本数量较少,模型泛化能力较差,接下来我会对读取图像样本做处理以增加训练的泛化能力。
还有个问题,mode == 3 的时候如果读取图像错误就会报错,接下来还需要解决这个问题。