初学tensorflow 基于卷积神经网络（CNN）和 opencv 的人脸识别

最新推荐文章于 2021-07-05 11:49:59 发布

qq_41775810

最新推荐文章于 2021-07-05 11:49:59 发布

阅读量5.6k

点赞数 5

分类专栏： tensorflow学习笔记文章标签： tensorflow 深度学习 CNN

本文链接：https://blog.csdn.net/qq_41775810/article/details/82596989

版权

tensorflow学习笔记专栏收录该内容

1 篇文章 0 订阅

订阅专栏

最近在学习tensorflow卷积神经网络，做了一个基于opencv的小系统来识别人脸
opencv被用来获取人脸范围以及储存样本，
numpy辅助一些矩阵的运算

文件存放结构是这样的：

network：
		faces：
				成员1：
						face1
						face2
						。。。
				成员2：
						face1
						face2
						。。。
				。。3 ：
				。。。：
data_saved:
				***.ckpt
			    。。。

调用了这些库，其中有两个自己编写的文件

import tensorflow as tf
import cv2
import numpy as np
import text
import img

接下来上主代码

Root_path = r"C:\\Users\\96904\\Desktop\\network"
Save_path = Root_path + "/data_saved/save_net.ckpt"
def weight_variable(shape):
  initial = tf.truncated_normal(shape, stddev=0.1)
  return tf.Variable(initial)

def bias_variable(shape):
  initial = tf.constant(0.1, shape=shape)
  return tf.Variable(initial)
def conv2d(x, W):
  return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x):
  return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                        strides=[1, 2, 2, 1], padding='SAME')

定义文件存放路径以及实现卷积所用的库


while(True):
    a = img.imgs(Root_path)
    try:
        start=int(input("输入1开始读取面部特征\n输入2开始训练\n输入3实时识别\n输入0退出系统："))
    except:
        start = 0
        print("输入错误，请重新输入")

    if(start==1):#获取样本
        a.get_faces()

    elif(start == 2):#开始训练
        mode = int(input("输入1开始训练\n输入2开始测试精度\n输入3开始识别\n输入0返回："))

        if(mode == 0):#返回上一步
            continue
		#否则开始描述tensorflow的图
        x_batch, y_batch, x_test, y_test, names= a.get_batchs()
        x_batch, y_batch = a.shuffle_batch(x_batch, y_batch)
        x_test, y_test = a.shuffle_batch(x_test, y_test)

        xs = tf.placeholder(dtype="float", shape=[None, 36, 36, 1])
        ys = tf.placeholder(dtype="float", shape=[None, len(names)])
        keep_prob = tf.placeholder("float")

        W_conv1 = weight_variable([5, 5, 1, 32])
        b_conv1 = bias_variable([32])
        h_conv1 = tf.nn.relu(conv2d(xs, W_conv1) + b_conv1)
        h_pool1 = max_pool_2x2(h_conv1)

        W_conv2 = weight_variable([5, 5, 32, 64])
        b_conv2 = bias_variable([64])
        h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
        h_pool2 = max_pool_2x2(h_conv2)

        W_conv3 = weight_variable([5, 5, 64, 128])
        b_conv3 = bias_variable([128])
        h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
        h_pool3 = max_pool_2x2(h_conv3)

        W_fc1 = weight_variable([5 * 5 * 128, 1024])
        b_fc1 = bias_variable([1024])

        h_pool3_flat = tf.reshape(h_pool3, [-1, 5 * 5 * 128])
        h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
        h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

        W_fc2 = weight_variable([1024, len(names)])
        b_fc2 = bias_variable([len(names)])
        y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

        cross_entropy = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = y_conv,labels=ys))
        
        train_step = tf.train.AdamOptimizer(0.001).minimize(cross_entropy)
        correct_prediction = tf.equal(tf.argmax(y_conv, 1), tf.argmax(ys, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, "float"))


        saver = tf.train.Saver()#开启文件保存
        if(mode == 1):     #训练保存好的样本文件
            with tf.Session() as sess:
                try:        #如果有对应的数据文件（.ckpt）存在就打开，否则开始构建对应本次样本的数据文件
                    saver.restore(sess,Save_path)
                except:
                    init = tf.global_variables_initializer()
                    sess.run(init)
                    print("No such save file.")
                losses = 999

                for cnt in range (141):
                    tem_batch_x, tem_batch_y = a.get_next_batch(cnt, 20, x_batch, y_batch)
                    if(cnt%20 == 0):
                        #tem_batch_x, tem_batch_y = a.get_next_batch(cnt, 10, x_batch, y_batch)
                        #print("Loss:", sess.run(cross_entropy, feed_dict={xs: x_test, ys: y_test, keep_prob: 1.0}))
                        #train_accuracy = accuracy.eval(session=sess,feed_dict={xs: tem_batch_x, ys: tem_batch_y, keep_prob: 1.0})
                        train_accuracy = sess.run(accuracy, feed_dict={xs: x_test, ys: y_test, keep_prob: 1.0})
                        loss = sess.run(cross_entropy, feed_dict={xs: x_test, ys: y_test, keep_prob: 1.0})
                        print("step %d, training accuracy %g" % (cnt, train_accuracy))
                        if(loss < losses):
                            losses = loss
                            saver.save(sess, Save_path)
                    train_step.run(session=sess, feed_dict={xs: tem_batch_x, ys: tem_batch_y, keep_prob: 0.1})
            tf.reset_default_graph()    #清除此次的内存堆栈，为下次运行做准备
        if(mode == 2):     #测试样本准确度
            with tf.Session() as sess:
                try:
                    saver.restore(sess, Save_path)
                except:
                    print("please train the module first.")
                    continue

                for cnt in range(201):
                    tem_batch_x, tem_batch_y = a.get_next_batch(cnt, 20, x_batch, y_batch)           #用a.中的方法获取样本
                    if (cnt % 20 == 0):
                        train_accuracy = accuracy.eval(session=sess,feed_dict={xs: tem_batch_x, ys: tem_batch_y, keep_prob: 1.0})
                        print("step %d, training accuracy %g" % (cnt, train_accuracy))
            tf.reset_default_graph()
        if(mode == 3):    #从摄像头读取20个样本测试是否正确
            face_label = tf.argmax(y_conv, 1)
            with tf.Session() as sess:
                faces = a.get_tem_faces()
                try:
                    saver.restore(sess, Save_path)
                except:
                    print("please train the module first.")
                    continue

                face_label = sess.run(face_label,feed_dict={xs:faces, ys:[np.zeros(len(names))] ,keep_prob:1})
                name_labels = []
                face_labels = np.zeros(len(names))
                you_are = 0
                for i in face_label:
                    face_labels[i] = face_labels[i] + 1

                for i in face_labels:
                    name_labels.append(i / len(face_label))

                for i in range(len(name_labels)):
                    print(name_labels[i])
                    if (name_labels[i] >= 0.8):
                        you_are = names[i + 1]

                if(you_are):
                    print("You are",you_are)
                    result = input("Am I right?[y/n]")
                    if(result == 'y'):
                        print("Ha ha, I think so (〃'▽'〃) ")
                    elif(result == 'n'):#如果不正确，用这次样本重新训练一遍
                        flag = 0
                        y_ = np.zeros(len(names))
                        faces_y = input("Please tell me who you are:")
                        for key in names:
                            if(names[key] == faces_y):
                                y_[key - 1] = 1
                                flag = 1
                                break
                        input(y_)
                        if(flag):
                            y__=[y_]
                            y_=y__
                            for i in range(19):
                                y_=np.append(y_,y__,axis=0)
                            print("I'm trying to do batter!")
                            train_step.run(session=sess, feed_dict={xs: faces, ys: y_, keep_prob: 0.1})
                            saver.save(sess, Save_path)
                        else:
                            print("Name error, please try again, 嘤嘤嘤（这里还没想好怎么重新输入）")
                else:
                    print("I don't know who you are^(>=>)^")   #同上，重新训练
                    flag = 0
                    y_ = np.zeros(len(names))
                    faces_y = input("Sorry I forget who you are...Please tell me who you are:")
                    for key in names:
                        if (names[key] == faces_y):
                            y_[key - 1] = 1
                            flag = 1
                            break
                    input(y_)
                    if (flag):
                        y__ = [y_]
                        y_ = y__
                        for i in range(19):
                            y_ = np.append(y_, y__, axis=0)
                        print("I'm trying to do batter!")
                        train_step.run(session=sess, feed_dict={xs: faces, ys: y_, keep_prob: 0.1})
                        saver.save(sess, Save_path)
                    else:
                        print("Name error, please try again, 嘤嘤嘤（这里还没想好怎么重新输入）")
            tf.reset_default_graph()

    elif(start == 3):
        text.real_time()
    elif(not start):
        print("Bye~")
        break

    else:
        print("输入错误，请重新输入")

这里是两个自己写的文件
文件 img.py

import cv2
import pickle
import os
import random
import tensorflow as tf
import numpy as np
test_range = 0.98
class imgs:
    def __init__(self,Root_path):
        self.Root_path=Root_path

    def get_faces(self):       #获取样本
        print("正在初始化……")
        i = 1
        n = 0
        cap = cv2.VideoCapture(0)
        face = cv2.CascadeClassifier(
            r"C:\\Users\\96904\\Desktop\\OpenCV\\face1-lvdaidai\\haarcascades\\haarcascade_frontalface_default.xml")
        whose_name = input("请输入你的姓名")
        data_range = int(input("请输入读取特征数量"))
        data_path = self.Root_path + r"\\faces\\" + whose_name + r"\\data.pkl"
        try:
            pkl_file = open(data_path, 'rb')
            i = pickle.load(pkl_file)
            n = i
            print(i)
            pkl_file.close()
        except:
            pass
        while (i <= data_range + n):
            ret, img = cap.read()
            img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            faces = face.detectMultiScale(img_g, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
            for (x, y, w, h) in faces:
                whose_face = img_g[y:y + h, x:x + w]
                cv2.rectangle(img_g, (x, y), (x + w, y + h), (0, 0, 255), 2)
                size = whose_face.shape
                print(size)
            save_path = self.Root_path+ r"\\faces\\" + whose_name + r"\\face" + str(i) + ".jpg"
            if (len(faces) == 1):
                if (size[0] > 100):
                    try:
                        whose_face = cv2.resize(whose_face, (100, 100))
                        cv2.imshow("face", whose_face)
                        cv2.imwrite(save_path, whose_face)
                        i += 1
                    except:
                        pass
                    cv2.waitKey(1)
                else:
                    pass
            else:
                print("只允许一个人录入")

        else:
            i -= 1
            output = open(data_path, 'wb')
            pickle.dump(i, output)
            output.close()
            cap.release()
            cv2.destroyAllWindows()

    def get_tem_faces(self):        #获取测试样本（mode == 3）
        print("正在初始化……")
        face_tem = []
        cap = cv2.VideoCapture(0)
        face = cv2.CascadeClassifier(
            r"C:\\Users\\96904\\Desktop\\OpenCV\\face1-lvdaidai\\haarcascades\\haarcascade_frontalface_default.xml")
        for i in range(20):
            ret, img = cap.read()
            img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            faces = face.detectMultiScale(img_g, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
            if (len(faces) == 1):
                for (x, y, w, h) in faces:
                    whose_face = img_g[y:y + h, x:x + w]
                    cv2.rectangle(img_g, (x, y), (x + w, y + h), (0, 0, 255), 2)
                    size = whose_face.shape
                    print(size)

                    if (size[0] > 100):
                        whose_face = cv2.resize(whose_face, (36, 36))
                        face_tem.append(whose_face)
                        cv2.waitKey(1)
                    else:
                        pass
            else:
                print("只允许一个人录入")

        cap.release()
        cv2.destroyAllWindows()
        face_tem = np.array(face_tem)
        face_tem = face_tem.reshape(-1, 36, 36, 1)
        face_tem = face_tem * (1. / 255) - 0.5
        return face_tem

    def get_batchs(self):          #读取样本为内存堆栈

        names = {}
        members_paths = {}
        i = 1
        j = 1
        for root, dirs, files in os.walk(self.Root_path + r"\\faces"):
            for name in dirs:
                names[i] = name
                members_paths[i] = os.path.join(root, name)
                i=i+1
        while j < i:
            members_path = members_paths[j]+"/"
            print(members_path)
            feed_list = np.zeros((i - 1), dtype=np.int)
            feed_list[j-1] = 1
            _x = []
            _y = []
            for root, dirs, files in os.walk(members_path):
                for name in files:
                    path = os.path.join(root, name)
                    if (os.path.splitext(path)[1] == ".jpg"):
                        #_x.append(cv2.imread(path, cv2.IMREAD_GRAYSCALE))
                        _x.append(cv2.resize(cv2.imread(path, cv2.IMREAD_GRAYSCALE),(36, 36)))#减小读取像素大小
                        _y.append(feed_list)
            try:
                train_x = np.vstack((train_x, _x[0:int(len(_x) * test_range)]))
                train_y = np.vstack((train_y, _y[0:int(len(_y)  *test_range)]))
                test_x = np.vstack((test_x, _x[int(len(_x) * test_range):len(_x)]))
                test_y = np.vstack((test_y, _y[int(len(_y) * test_range):len(_y)]))
            except:
                train_x = _x[0:int(len(_x) * test_range)]
                train_y = _y[0:int(len(_y) * test_range)]
                test_x = _x[int(len(_x) * test_range):len(_x)]
                test_y = _y[int(len(_y) * test_range):len(_y)]
            #test_x.append(_x[int(len(_x) * test_range):len(_x) - 1])
            #test_y.append(_y[int(len(_y) * test_range):len(_y) - 1])
            #train_x.append(_x[0:int(len(_x) * test_range) - 1])
            #train_y.append(_y[0:int(len(_y) * test_range) - 1])

            j=j+1

        test_x = np.array(test_x)
        test_x = test_x.reshape(-1, 36, 36, 1)
        test_x = test_x * (1. / 255) - 0.5

        return train_x, train_y, test_x, test_y, names

    def shuffle_batch(self, batch_x,batch_y):     #打乱样本内容
        size1 = len(batch_y)
        size2 = len(batch_x)
        if(size1==size2):
            randnum = np.random.randint(0, size1)
            np.random.seed(randnum)
            np.random.shuffle(batch_x)
            np.random.seed(randnum)
            np.random.shuffle(batch_y)
        else:
            print("Error:batch_x and batch_y must has the same length.")
        return batch_x, batch_y

    def get_next_batch(self, cnt, batch_size, batch_x, batch_y):     #获取下一次的样本来做训练
        cnt = cnt * batch_size
        if(cnt <= len(batch_x) - batch_size):
            img = batch_x[cnt:cnt + batch_size]
            lable_y = batch_y[cnt:cnt + batch_size]
        else:
            img = batch_x[cnt:len(batch_x)]
            img = np.vstack((img, batch_x[0:cnt + batch_size - len(batch_x)]))
            lable_y = batch_y[cnt:len(batch_y)]
            lable_y = np.vstack((lable_y, batch_y[0:cnt + batch_size - len(batch_y)]))

        img = np.array(img)
        img = img.reshape(-1, 36, 36, 1)
        img = img * (1. / 255) - 0.5
        return img, lable_y

文件text.py 用来做实时识别

import cv2
import os
import tensorflow as tf
import numpy as np
def real_time():
    Root_path = r"C:\\Users\\96904\\Desktop\\network"
    Save_path = Root_path + "/data_saved/save_net.ckpt"

    cap = cv2.VideoCapture(0)
    face = cv2.CascadeClassifier(
            r"C:\\Users\\96904\\Desktop\\OpenCV\\face1-lvdaidai\\haarcascades\\haarcascade_frontalface_default.xml")
    def weight_variable(shape):
      initial = tf.truncated_normal(shape, stddev=0.1)
      return tf.Variable(initial)

    def bias_variable(shape):
      initial = tf.constant(0.1, shape=shape)
      return tf.Variable(initial)
    def conv2d(x, W):
      return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

    def max_pool_2x2(x):
      return tf.nn.max_pool(x, ksize=[1, 2, 2, 1],
                            strides=[1, 2, 2, 1], padding='SAME')

    names = {}
    i = 1
    for root, dirs, files in os.walk(Root_path + r"\\faces"):
        for name in dirs:
            names[i] = name
            i = i + 1

    xs = tf.placeholder(dtype="float", shape=[None, 36, 36, 1])
    keep_prob = tf.placeholder("float")

    W_conv1 = weight_variable([5, 5, 1, 32])
    b_conv1 = bias_variable([32])
    h_conv1 = tf.nn.relu(conv2d(xs, W_conv1) + b_conv1)
    h_pool1 = max_pool_2x2(h_conv1)

    W_conv2 = weight_variable([5, 5, 32, 64])
    b_conv2 = bias_variable([64])
    h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2)
    h_pool2 = max_pool_2x2(h_conv2)

    W_conv3 = weight_variable([5, 5, 64, 128])
    b_conv3 = bias_variable([128])
    h_conv3 = tf.nn.relu(conv2d(h_pool2, W_conv3) + b_conv3)
    h_pool3 = max_pool_2x2(h_conv3)

    W_fc1 = weight_variable([5 * 5 * 128, 1024])
    b_fc1 = bias_variable([1024])

    h_pool3_flat = tf.reshape(h_pool3, [-1, 5 * 5 * 128])
    h_fc1 = tf.nn.relu(tf.matmul(h_pool3_flat, W_fc1) + b_fc1)
    h_fc1_drop = tf.nn.dropout(h_fc1, keep_prob)

    W_fc2 = weight_variable([1024, len(names)])
    b_fc2 = bias_variable([len(names)])
    y_conv = tf.matmul(h_fc1_drop, W_fc2) + b_fc2

    saver = tf.train.Saver()
    face_label = tf.argmax(y_conv, 1)
    sess = tf.Session()
    try:
        saver.restore(sess, Save_path)
        start_flag = 1
    except:
        print("please train the module first.")
        start_flag = 0

    while(start_flag):
        ret, img = cap.read()
        img_g = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
        faces = face.detectMultiScale(img_g, scaleFactor=1.1, minNeighbors=5, minSize=(30, 30))
        if (len(faces) == 1):
            for (x, y, w, h) in faces:
                a = x
                b = y
                whose_face = img_g[y:y + h, x:x + w]
                cv2.rectangle(img, (x, y), (x + w, y + h), (0, 0, 255), 2)
                size = whose_face.shape
                print(size)
                whose_face = cv2.resize(whose_face, (36, 36))
                face_tem = np.array(whose_face)
                face_tem = face_tem.reshape(-1, 36, 36, 1)
                face_tem = face_tem * (1. / 255) - 0.5
        else:
            print("只允许一个人录入")

        try:
            face_labels = sess.run(face_label,feed_dict = {xs:face_tem, keep_prob:1.0})
            you_are = names[face_labels[0] + 1]

            cv2.putText(img, you_are, (a, b), cv2.FONT_HERSHEY_SIMPLEX, 1, (255, 255, 255))
        except:
            pass
        cv2.imshow("test",img)
        if (cv2.waitKey(1) == "q"):
            break

    tf.reset_default_graph()

现在还是存在问题，样本数量较少，模型泛化能力较差，接下来我会对读取图像样本做处理以增加训练的泛化能力。
还有个问题，mode == 3 的时候如果读取图像错误就会报错，接下来还需要解决这个问题。