基于卷积神经网络（cnn）的人脸识别（代码+注释）

最新推荐文章于 2024-07-25 09:48:00 发布

我假装看天空

最新推荐文章于 2024-07-25 09:48:00 发布

阅读量1.7w

点赞数 34

文章标签：大数据 python

原文链接：https://www.cnblogs.com/AIBigTruth/p/10602995.html

版权

卷积神经网络的核心是卷积，在cnn的卷积层中存在一个个填充着数字的正方形小格子，他们被称为卷积核。原始图片经过输入层后会变为灰度或是RGB数值填充的矩阵。将卷积核与图片矩阵对齐，对应格子中的数字相乘后再相加，再将得到的数字填入新矩阵，这就是卷积。卷积核以一定的距离在图像上移动运算，这被称为步长，得到的新矩阵能反映图像的部分特征，因此被称为特征图。他们既是这一层的输出，又是下一层的输入。设定不同的卷积核，我们就能找到各种各样的特征。训练就是根据已有的数据和标签，自动确定卷积核中的数字。池化层能选取图像的主要特征，矩阵被池化后，参数能大量减少。全连接层通常在网络的最后，能将提取到的特征集合在一起。给出图片是某种事物的概率。

此项目共分四步：

一、采集我的人脸数据集

二、采集其他人脸数据集

三、CNN模型训练

四、人脸识别

一、采集我的人脸数据集

"""-----------------------------------------
一、采集我的人脸数据集
获取本人的人脸数据集10000张，使用的是dlib来
识别人脸，虽然速度比OpenCV识别慢，但是识别效
果更好。
人脸大小：64*64
-----------------------------------------"""
# 开源的视觉库，实现图像处理和计算机视觉方面的很多通用算法
import cv2
#  机器学习的开源库，（直接包含头文件即可）
import dlib
#  提供一些方便使用操作系统相关功能的函数
import os
#  生成随机数
import random

faces_my_path = './faces_my'
# 图片大小采集的64 X 64
size = 64
if not os.path.exists(faces_my_path):  # 判断是否存在
    os.makedirs(faces_my_path)   # 创建目录

"""改变图片的相关参数：亮度与对比度
处理图像经常会对图像色彩进行增强,这就是改变图像的亮度和对比度"""

#  声明函数 light表示对比度 bias表示亮度偏置
def img_change(img, light=1, bias=0):
    width = img.shape[1]   # 图像的水平尺寸（宽度）
    height = img.shape[0]  # 图像的垂直尺寸（高度）
    #  范围
    for i in range(0, width):  # 给i赋值
        for j in range(0, height):
            for k in range(3):  # 循环3次
                #  #线性调整 light表示对比度，bias代表亮度偏置
                #  temp临时
                tmp = int(img[j, i, k] * light + bias)
                if tmp > 255:
                    tmp = 255

                elif tmp < 0:
                    tmp = 0
                img[j, i, k] = tmp

    return img


"""特征提取器:dlib自带的frontal_face_detector，作为我们的特征提取器"""
detector = dlib.get_frontal_face_detector()
# VideoCapture()中参数是0，表示打开笔记本的内置第一个摄像头。参数是视频文件路径则打开视频
cap = cv2.VideoCapture(0)

num = 1
while True:
    # 存储10000张人脸特征图像
    if (num <= 300):
        print('已采集 %s' % num)

        # 从摄像头
        success, img = cap.read()   # 从摄像头读取照片
        gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 转为灰度照片
        """使用detector进行人脸检测"""

        dets = detector(gray_img, 1)  # 1采样次数

        """--------------------------------------------------------------------
        使用enumerate 函数遍历序列中的元素以及它们的下标,i为人脸序号,d为i对应的元素;
        left：人脸左边距离图片左边界的距离 ；right：人脸右边距离图片左边界的距离 
        top：人脸上边距离图片上边界的距离 ；bottom：人脸下边距离图片上边界的距离
        ----------------------------------------------------------------------"""

        for i, d in enumerate(dets):
            x1 = d.top() if d.top() > 0 else 0
            y1 = d.bottom() if d.bottom() > 0 else 0
            x2 = d.left() if d.left() > 0 else 0
            y2 = d.right() if d.right() > 0 else 0

            face = img[x1:y1, x2:y2]
            """调整图片的对比度与亮度， 对比度与亮度值都取随机数，这样能增加样本的多样性"""
            face = img_change(face, random.uniform(0.5, 1.5), random.randint(-50, 50))
            face = cv2.resize(face, (size, size))  # 调整图片的大小
            cv2.imshow('image', face)  # 使用给定的变量（face）显示图像
            cv2.imwrite(faces_my_path + '/' + str(num) + '.jpg', face)
            #cv2.imwrite(r'./faces_other' + '/' + str(num) + '.jpg', face)
            num = num + 1

        key = cv2.waitKey(30)  # 设置等待30毫秒
        if key == 27:  # 大于等于0即可

            break
    else:
        print('Finished!')
        breake

二、采集其他人脸数据集

"""---------------------------------------------------------------
二、采集其他人脸数据集
本系统使用人脸数据集下载:http://vis-www.cs.umass.edu/lfw/lfw.tgz
先将下载的图片集放在img_source目录下，用dlib来批量识别图片中的人脸部分，
并保存到指定目录faces_other
人脸大小：64*64
----------------------------------------------------------------"""
# -*- coding: utf-8 -*-
# sys模块是最常用的和python解释器交互的模块,sys模块可供访问由解释器(interpreter)
# 使用或维护的变量和与解释器进行交互的函数。sys 模块提供了许多函数和变量来处理 Python 运行时环境的不同部分
import sys
import cv2
import os
# dlib是一个机器学习的开源库，包含了机器学习的很多算法，使用起来很方便。也提供了一些python接口。
import dlib

source_path = './img_source'
faces_other_path = './faces_other'

size = 64
if not os.path.exists(faces_other_path):
    os.makedirs(faces_other_path)

"""特征提取器:dlib自带的frontal_face_detector"""
detector = dlib.get_frontal_face_detector()


"""os,walk()会返回一个生成器，每次迭代都会返回一个元组，元组返回三个元素，分别是当前目
录（path），当前目录的子目录（dirnames），当前目录的文件（filenames）. """

num = 1
for (path, dirnames, filenames) in os.walk(source_path):
    for filename in filenames:
        if filename.endswith('.jpg'):   # 用endswith判断一个文件名是不是以jpg
            print('Being processed picture %s' % num)
            img_path = path+'/'+filename
            img = cv2.imread(img_path)   # 读图片
            # 转为灰度图片
            gray_img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
            """ 使用detector进行人脸检测 dets为返回的结果"""
            dets = detector(gray_img, 1)  # 1采样次数

            """--------------------------------------------------------------------
            使用enumerate 函数遍历序列中的元素以及它们的下标,i为人脸序号,d为i对应的元素;
            left：人脸左边距离图片左边界的距离 ；right：人脸右边距离图片左边界的距离 
            top：人脸上边距离图片上边界的距离 ；bottom：人脸下边距离图片上边界的距离
            ----------------------------------------------------------------------"""
            for i, d in enumerate(dets):
                x1 = d.top() if d.top() > 0 else 0
                y1 = d.bottom() if d.bottom() > 0 else 0
                x2 = d.left() if d.left() > 0 else 0
                y2 = d.right() if d.right() > 0 else 0

                face = img[x1:y1,x2:y2]
                face = cv2.resize(face, (size,size))   # 调整图片的尺寸
                cv2.imshow('image',face)
                cv2.imwrite(faces_other_path+'/'+str(num)+'.jpg', face)   # 保存
                num += 1
            # waitKey() 函数的功能是不断刷新图像 , 频率时间为delay , 单位为ms,该函数通常用在显示图像函数之后。
            """参数：delay延时时间，单位ms；
               delay>0时，延迟"delay"ms；
               当delay<=0的时，如果没有键盘触发，则一直等待"""
            key = cv2.waitKey(30)
            if key == 27:
                sys.exit(0)

三、CNN模型训练

"""--------------------------------------------------------------
三、CNN模型训练
训练模型：共八层神经网络，卷积层特征提取，池化层降维,全连接层进行分类。
训练数据：22784，测试数据：1200，训练集：测试集=20:1
共两类：我的人脸（yes),不是我的人脸（no）。
共八层： 第一、二层（卷积层1、池化层1），输入图片64*64*3，输出图片32*32*32
        第三、四层（卷积层2、池化层2），输入图片32*32*32，输出图片16*16*64
        第五、六层（卷积层3、池化层3），输入图片16*16*64，输出图片8*8*64
        第七层（全连接层），输入图片8*8*64，reshape到1*4096，输出1*512
        第八层（输出层），输入1*512，输出1*2
学习率：0.01
损失函数：交叉熵
优化器：Adam  (2014年12月， Kingma和Lei Ba两位学者提出了Adam优化器，结合AdaGrad和RMSProp两种优化算法的优点。
对梯度的一阶矩估计（First Moment Estimation，即梯度的均值）和二阶矩估计（Second Moment Estimation，即梯度的未中心化的方差）
进行综合考虑，计算出更新步长。)
------------------------------------------------------------------"""
# train_test_split()是sklearn.model_selection中的分离器函数，用于将数组或矩阵划分为训练集和测试集，函数样式为：
# X_train, X_test, y_train, y_test = train_test_split(train_data, train_target, test_size, random_state，shuffle)
from sklearn.model_selection import train_test_split
import tensorflow.compat.v1 as tf      # TensorFlow是一个基于数据流编程（dataflow programming）的符号数学系统，
import numpy as np      # 数值计算库
import random
import cv2
import sys
import os


""" 定义参数 """
faces_my_path = './faces_my'
faces_other_path = './faces_other'
batch_size = 100  # 每次取100张图片
learning_rate = 0.01  # 学习率
size = 64  # 图片大小64*64*3
imgs = []  # 存放人脸图片
labs = []  # 存放人脸图片对应的标签

"""定义读取人脸数据函数，根据不同的人名，分配不同的onehot值（独热编码，只有1,0）"""
# 当前目录（path），当前目录的子目录（dirnames），当前目录的文件（filenames）
def readData(path, h=size, w=size):
    for filename in os.listdir(path):  # os.listdir() 用于返回一个由文件名和目录名组成的列表
        # 用endswith判断一个文件名是不是以jpg
        if filename.endswith('.jpg'):
            # 根据文件名改名：文件夹名+newname
            filename = path + '/' + filename
            # 读取图片
            img = cv2.imread(filename)
            # 人脸上下左右距图片边界的距离
            top, bottom, left, right = getPaddingSize(img)
            """放大图片扩充图片边缘部分"""
            # cv2.BORDER_CONSTANT：添加的边界框像素值为常数（需要额外再给定一个参数，0是黑色）
            img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])

            # 使用函数cv2.resize()进行缩放到指定大小,传入的目标形状是(W,H),而不是(H,W),这一点要格外留意.
            img = cv2.resize(img, (w, h))
            imgs.append(img)  # 一张张人脸图片加入imgs列表中
            labs.append(path)  # 一张张人脸图片对应的path，即文件夹名faces_my和faces_other，即标签


"""定义尺寸变换函数，有一套公式"""
# 获得图片的宽和高还有深度
# padding 填充像素
def getPaddingSize(img):
    height, width, _ = img.shape
    top, bottom, left, right = (0, 0, 0, 0)
    longest = max(height, width)

    if width < longest:
        tmp = longest - width
        left = tmp // 2
        right = tmp - left
    elif height < longest:
        tmp = longest - height
        top = tmp // 2
        bottom = tmp - top
    else:
        pass
    return top, bottom, left, right


"""定义神经网络层，共五层，卷积层特征提取，池化层降维,全连接层进行分类，共两类：我的人脸（true),不是我的人脸（false）"""
# 进行卷积操作，相乘求和，滑动窗口的遍历
# 卷积网络在计算每一层的网络个数的时候要细心一些，不然容易出错
# 要注意下一层的输入是上一层的输出
# Tensorflow中的CNN layer参数（即用tensorflow框架实现简单CNN）
def cnnLayer():
    """第一、二层，输入图片64*64*3，输出图片32*32*32"""
    # 一般彩色图像有三个颜色通道（红绿蓝）
    #  tf.Variable通过变量维持图graph的状态，以便在sess.run中执行
    #  tf.random_normal取出指定个数的值
    #  tf.nn.relu线性整流函数，将输入小于0的值幅值为0，输入大于0的值不变
    """
参数 ksize：池化窗口的大小，取一个四维向量，一般是 [1, height, width, 1]，因为我们不想在 batch和 channels 上做池化，所以这两个维度设为了1
参数 strides：和卷积类似，窗口在每一个维度上滑动的步长，一般也是 [1, stride,stride, 1]
参数 padding：和卷积类似，可以取 'VALID' 或者 'SAME'
"""
    W1 = tf.Variable(tf.random_normal([3, 3, 3, 32]))  # 卷积核大小(3,3)， 输入通道(3)， 输出通道(32)
    b1 = tf.Variable(tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.conv2d(x, W1, strides=[1, 1, 1, 1], padding='SAME') + b1)  # 64*64*32，卷积提取特征，增加通道数
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  # 32*32*32，池化降维，减小复杂度
    drop1 = tf.nn.dropout(pool1, keep_prob_fifty)  # 按一定概率随机丢弃一些神经元，以获得更高的训练速度以及防止过拟合
    # 过拟合：训练样本存在噪声，为了照顾它们，反而使真实测试时产生错误


    """第三、四层，输入图片32*32*32，输出图片16*16*64"""
    W2 = tf.Variable(tf.random_normal([3, 3, 32, 64]))  # 卷积核大小(3,3)， 输入通道(32)， 输出通道(64)
    b2 = tf.Variable(tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.conv2d(drop1, W2, strides=[1, 1, 1, 1], padding='SAME') + b2)  # 32*32*64
    pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  # 16*16*64
    drop2 = tf.nn.dropout(pool2, keep_prob_fifty)

    """第五、六层，输入图片16*16*64，输出图片8*8*64"""
    W3 = tf.Variable(tf.random_normal([3, 3, 64, 64]))  # 卷积核大小(3,3)， 输入通道(64)， 输出通道(64)
    b3 = tf.Variable(tf.random_normal([64]))
    conv3 = tf.nn.relu(tf.nn.conv2d(drop2, W3, strides=[1, 1, 1, 1], padding='SAME') + b3)  # 16*16*64
    pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  # 8*8*64=4096
    drop3 = tf.nn.dropout(pool3, keep_prob_fifty)

    """第七层，全连接层，将图片的卷积输出压扁成一个一维向量，输入图片8*8*64，reshape到1*4096，输出1*512"""
    Wf = tf.Variable(tf.random_normal([8 * 8 * 64, 512]))  # 输入通道(4096)， 输出通道(512)
    bf = tf.Variable(tf.random_normal([512]))
    drop3_flat = tf.reshape(drop3, [-1, 8 * 8 * 64])  # -1表示行随着列的需求改变，1*4096
    dense = tf.nn.relu(tf.matmul(drop3_flat, Wf) + bf)  # [1,4096]*[4096,512]=[1,512]
    dropf = tf.nn.dropout(dense, keep_prob_seventy_five)

    """第八层，输出层，输入1*512，输出1*2，再add，输出一个数"""
    Wout = tf.Variable(tf.random_normal([512, 2]))  # 输入通道(512)， 输出通道(2)
    bout = tf.Variable(tf.random_normal([2]))
    out = tf.add(tf.matmul(dropf, Wout), bout)  # (1,512)*(512,2)=(1,2) ,跟y_ [0,1]、[1,0]比较给出损失
    return out


"""定义训练函数"""


def train():
    out = cnnLayer()
    """损失函数为交叉熵（度量两个概率分布间的差异信息）
    总的计算交叉熵，需要加 tf.reduce_mean
    softmax_cross_entropy_with_logits交叉熵函数
    logits，可视为⼀种未归⼀化的“概率替代物”
    labels分类的标签  
"""
    cross_entropy = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=out, labels=y_))
    """采用Adam优化器
    tf.train.AdamOptimizer()函数是Adam优化算法：是一个寻找全局最优点的优化算法，引入了二次方梯度校正。
    learning_rate:张量或浮点值。学习速率
"""
    optimizer = tf.train.AdamOptimizer(learning_rate).minimize(cross_entropy)
    """求得准确率，比较标签是否相等，再求的所有数的平均值"""
    # tf.cast(强制转换类型)
    # 比较标签是否相等，再求出所有数的平均值
    accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(y_, 1)), tf.float32))
    # 将loss与accuracy保存以供tensorboard使用
    tf.summary.scalar('loss', cross_entropy)
    tf.summary.scalar('accuracy', accuracy)
    # 合并所有的Op为一个Op（神经元）
    merged_summary_op = tf.summary.merge_all()
    # 数据保存器的初始化
    saver = tf.train.Saver()
    with tf.Session() as sess:  # 上下文退出时自动释放
        sess.run(tf.global_variables_initializer())  # 初始化模型参数
        # 把summary  Op返回的数据写到磁盘里
        summary_writer = tf.summary.FileWriter('./tmp', graph=tf.get_default_graph())

        num_batch = len(train_x) // batch_size  # 总体样本需要取几次
        for n in range(10):
            # """每次取100(batch_size)（训练的参数个数）张图片（前面定义的参数）,"""
            for i in range(num_batch):
                batch_x = train_x[i * batch_size: (i + 1) * batch_size]  # 图片
                batch_y = train_y[i * batch_size: (i + 1) * batch_size]  # 标签：[0,1] [1,0]

                # 开始训练数据，同时训练三个变量，返回三个数据，
                _, loss, summary = sess.run([optimizer, cross_entropy, merged_summary_op],
                                            feed_dict={x: batch_x, y_: batch_y,  # feed_dict赋值
                                                       keep_prob_fifty: 0.5, keep_prob_seventy_five: 0.75})
                # 保存训练数据
                summary_writer.add_summary(summary, n * num_batch + i)

                # 打印损失
                print("step:%d,  loss:%g" % (n * num_batch + i, loss))

                if (n * num_batch + i) % 100 == 0:
                    # 获取测试数据的准确率
                    acc = accuracy.eval({x: test_x, y_: test_y, keep_prob_fifty: 1.0, keep_prob_seventy_five: 1.0})
                    print("step:%d,  acc:%g" % (n * num_batch + i, acc))
                    """训练到准确率达到98%停止训练保存退出"""
                    if acc > 0.98 and n > 2:
                        saver.save(sess, './train_faces.model', global_step=n * num_batch + i)
                        sys.exit(0)


if __name__ == '__main__':
    # 1、读取人脸数据
    readData(faces_my_path)
    readData(faces_other_path)
    imgs = np.array(imgs)  # 将图片数据与标签转换成数组
    labs = np.array([[0, 1] if lab == faces_my_path else [1, 0] for lab in labs])  # 标签：[0,1]表示是我的人脸，[1,0]表示其他的人脸
    """2、随机划分测试集与训练集"""
    train_x_1, test_x_1, train_y, test_y = train_test_split(imgs, labs, test_size=0.05,
                                                            random_state=random.randint(0, 100))
    train_x_2 = train_x_1.reshape(train_x_1.shape[0], size, size, 3)  # 参数：图片数据的总数，图片的高、宽、通道
    test_x_2 = test_x_1.reshape(test_x_1.shape[0], size, size, 3)
    """3、归一化"""
    train_x = train_x_2.astype('float32') / 255.0  # astype函数：数据类型转化函数（虽然float64比float32有更高的精度，
    # 但float64占用的内存是float32的两倍）就是4bytes或8bytes.
    test_x = test_x_2.astype('float32') / 255.0
    print('Train Size:%s, Test Size:%s' % (len(train_x), len(test_x)))

    num_batch = len(train_x) // batch_size
    # 禁用v2版本
    tf.compat.v1.disable_v2_behavior()
    x = tf.placeholder(tf.float32, [None, size, size, 3])  # 输入X：64*64*3。默认是None，就是一维值
    # placeholder()函数是在神经网络构建graph的时候在模型中的占位，此时并没有把要输入的数据传入模型，
    # 它只会分配必要的内存。等建立session，在会话中，运行模型的时候通过feed_dict()函数向占位符喂入数据。
    y_ = tf.placeholder(tf.float32, [None, 2])  # 输出Y_：1*2
    keep_prob_fifty = tf.placeholder(tf.float32)  # 50%，即0.5（设置神经元被选中的概率）
    keep_prob_seventy_five = tf.placeholder(tf.float32)  # 75%，即0.75
    """4、进行训练"""
    train()

四、人脸识别

"""---------------------------------------------------------
四、人脸识别
1、打开摄像头，获取图片并灰度化
2、人脸检测
3、加载卷积神经网络模型
4、人脸识别
------------------------------------------------------------"""
# TensorFlow™是一个基于数据流编程（dataflow programming）的符号数学系统，
# 被广泛应用于各类机器学习（machine learning）算法的编程实现，其前身是谷歌的神经网络算法库DistBelief
import tensorflow.compat.v1 as tf
import cv2
import dlib  #
import numpy as np
import os
import random
import sys
from sklearn.model_selection import train_test_split  #

"""定义参数"""
faces_my_path = './faces_my'
faces_other_path = './faces_other'
batch_size = 128  # 每次取100张图片
learning_rate = 0.01  # 学习率
size = 64  # 图片大小64*64*3
imgs = []  # 定义数组存放人脸图片
labs = []  # 存放人脸图片对应的标签

tf.compat.v1.disable_v2_behavior()
x = tf.placeholder(tf.float32, [None, size, size, 3])  # 输入X：64*64*3
y_ = tf.placeholder(tf.float32, [None, 2])  # 输出Y_：1*2
keep_prob_fifty = tf.placeholder(tf.float32)  # 50%，即0.5
keep_prob_seventy_five = tf.placeholder(tf.float32)  # 75%，即0.75

"""定义读取人脸数据函数"""


def readData(path, h=size, w=size):
    for filename in os.listdir(path):  # os.listdir() 方法用于返回指定的文件夹包含的文件或文件夹的名字的列表
        if filename.endswith('.jpg'):
            filename = path + '/' + filename
            img = cv2.imread(filename)
            top, bottom, left, right = getPaddingSize(img)
            """放大图片扩充图片边缘部分"""
            # .BORDER_CONSTANT常数值填充
            # 在OpenCV滤波算法中，有两个非常重要的基本工具函数，copyMakeBorder和borderInterpolate
            """在图像处理中，因为卷积算子有一定的大小，所以会导致图像一定范围的边界不能被处理，就需要扩充"""
            img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=[0, 0, 0])
            img = cv2.resize(img, (h, w))
            imgs.append(img)  # 一张张人脸图片加入imgs列表中
            labs.append(path)  # 一张张人脸图片对应的path，即文件夹名faces_my和faces_other，即标签.将对应的图片和标签存进数组


"""定义padding大小函数，有一套公式。"""
# 这里是得到图片的大小，进行统一处理，将图片改成统一大小
def getPaddingSize(img):
    # 得到图片高，宽，深度
    height, width, _ = img.shape
    top, bottom, left, right = (0, 0, 0, 0)
    longest = max(height, width)

    if width < longest:
        tmp = longest - width
        left = tmp // 2
        right = tmp - left
    elif height < longest:
        tmp = longest - height
        top = tmp // 2
        bottom = tmp - top
    else:
        pass
    return top, bottom, left, right


"""定义神经网络层，共五层，卷积层特征提取，池化层降维,全连接层进行分类，共两类：我的人脸（true),不是我的人脸（false）"""


# 下面开始进行卷积层的处理
# 卷积层加的padding为same是不会改变卷积层的大小的
def cnnLayer():
    """第一、二层，输入图片64*64*3，输出图片32*32*32"""
    W1 = tf.Variable(tf.random_normal([3, 3, 3, 32]))  # 卷积核大小(3,3)， 输入通道(3)， 输出通道(32)
    b1 = tf.Variable(tf.random_normal([32]))
    conv1 = tf.nn.relu(tf.nn.conv2d(x, W1, strides=[1, 1, 1, 1], padding='SAME') + b1)  # 64*64*32，卷积提取特征，增加通道数
    pool1 = tf.nn.max_pool(conv1, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  # 32*32*32，池化降维，减小复杂度
    drop1 = tf.nn.dropout(pool1, keep_prob_fifty)

    """第三、四层，输入图片32*32*32，输出图片16*16*64"""
    W2 = tf.Variable(tf.random_normal([3, 3, 32, 64]))  # 卷积核大小(3,3)， 输入通道(32)， 输出通道(64)
    b2 = tf.Variable(tf.random_normal([64]))
    conv2 = tf.nn.relu(tf.nn.conv2d(drop1, W2, strides=[1, 1, 1, 1], padding='SAME') + b2)  # 32*32*64
    pool2 = tf.nn.max_pool(conv2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  # 16*16*64
    drop2 = tf.nn.dropout(pool2, keep_prob_fifty)

    """第五、六层，输入图片16*16*64，输出图片8*8*64"""
    W3 = tf.Variable(tf.random_normal([3, 3, 64, 64]))  # 卷积核大小(3,3)， 输入通道(64)， 输出通道(64)
    b3 = tf.Variable(tf.random_normal([64]))
    conv3 = tf.nn.relu(tf.nn.conv2d(drop2, W3, strides=[1, 1, 1, 1], padding='SAME') + b3)  # 16*16*64
    pool3 = tf.nn.max_pool(conv3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME')  # 8*8*64=4096
    drop3 = tf.nn.dropout(pool3, keep_prob_fifty)

    """第七层，全连接层，输入图片8*8*64，reshape到1*4096，输出1*512"""
    Wf = tf.Variable(tf.random_normal([8 * 8 * 64, 512]))  # 输入通道(4096)， 输出通道(512)
    bf = tf.Variable(tf.random_normal([512]))
    drop3_flat = tf.reshape(drop3, [-1, 8 * 8 * 64])  # -1表示行随着列的需求改变，1*4096
    dense = tf.nn.relu(tf.matmul(drop3_flat, Wf) + bf)  # [1,4096]*[4096,512]=[1,512]
    dropf = tf.nn.dropout(dense, keep_prob_seventy_five)

    """第八层，输出层，输入1*512，输出1*2，再add，输出一个数"""
    Wout = tf.Variable(tf.random_normal([512, 2]))  # 输入通道(512)， 输出通道(2)
    bout = tf.Variable(tf.random_normal([2]))
    out = tf.add(tf.matmul(dropf, Wout), bout)  # (1,512)*(512,2)=(1,2) ,跟y_ [0,1]、[1,0]比较给出损失
    return out


"""定义人脸识别函数"""


def face_recognise(image):
    res = sess.run(predict, feed_dict={x: [image / 255.0], keep_prob_fifty: 1.0, keep_prob_seventy_five: 1.0})
    if res[0] == 1:
        return "Yes,my face"
    else:
        return "No,other face"


if __name__ == '__main__':

    """1、读取人脸数据"""
    readData(faces_my_path)
    readData(faces_other_path)
    imgs = np.array(imgs)  # 将图片数据与标签转换成数组
    labs = np.array([[0, 1] if lab == faces_my_path else [1, 0] for lab in labs])  # 标签：[0,1]表示是我的人脸，[1,0]表示其他的人脸
    """2、随机划分测试集与训练集"""
    #  train_x_1包括所有自变量，这些变量将用于训练模型
    # test_x_1这是数据中剩余的自变量部分
    # train_y,这是因变量，需要此模型进行预测
    # test_y 此数据具有测试数据的类别标签，这些标签将用于测试实际类别和预测类别之间的准确性。
    #  random_state控制随机状态,
    # random.randint(0, 100))返回任意整数
    #  test_size：样本占比，如果是整数的话就是样本的数量
    # train_test_split 函数将原始数据按照比例分割为“测试集”和“训练集”
    train_x_1, test_x_1, train_y, test_y = train_test_split(imgs, labs, test_size=0.05,
                                                            random_state=random.randint(0, 100))
    train_x_2 = train_x_1.reshape(train_x_1.shape[0], size, size, 3)  # 参数：图片数据的总数，图片的高、宽、通道
    test_x_2 = test_x_1.reshape(test_x_1.shape[0], size, size, 3)
    train_x = train_x_2.astype('float32') / 255.0  # 归一化
    test_x = test_x_2.astype('float32') / 255.0
    print('Train Size:%s, Test Size:%s' % (len(train_x), len(test_x)))
    num_batch = len(train_x) // batch_size  # 22784//128=178
    """3、将读取的人脸图片输出神经网络，输出out(1,2)"""
    out = cnnLayer()
    """4、预测， 1表示按行返回out中最大值的索引，而不是out与1比谁大返回谁，predict为索引值，0或1，
    因为out的shape是（1,2）,一行两列，两个数字"""
    predict = tf.argmax(out, 1)  # 0或者1

    """Saver的作用是将我们训练好的模型的参数保存下来，以便下一次继续用于训练或测试；Restore则是将训练好的参数提取出来。
        Saver类训练完后，是以checkpoints文件形式保存。提取的时候也是从checkpoints文件中恢复变量。"""
    # 保存和加载模型
    """Tensorflow 会自动生成4个文件
    训练后的模型

model.ckpt.meta，保存了 Tensorflow 计算图的结构，可以简单理解为神经网络的网络结构。

model.ckpt.index  文件保存了所有变量的取值。

 checkpoint 文件，保存了一个目录下所有的模型文件列表。"""
    saver = tf.train.Saver()

    sess = tf.Session()   # Session 是 Tensorflow 为了控制,和输出文件的执行的语句.
    # tf.train.latest_checkpoint()函数的作用查找最新保存的checkpoint文件的文件名
    saver.restore(sess, tf.train.latest_checkpoint('.'))
    """5、检测人脸，特征提取器: dlib自带的frontal_face_detector"""
    detector = dlib.get_frontal_face_detector()
    cap = cv2.VideoCapture(0)  # 打开摄像头
    while True:
        _, img = cap.read()  # 读取
        gray_image = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)  # 灰度化
        dets = detector(gray_image, 1)   # 使用detector进行人脸识别
        if not len(dets):

            key = cv2.waitKey(30)
            if key == 27:
                sys.exit(0)
        """--------------------------------------------------------------------
        使用enumerate 函数遍历序列中的元素以及它们的下标,i为人脸序号,d为i对应的元素;
        left：人脸左边距离图片左边界的距离 ；right：人脸右边距离图片左边界的距离 
        top：人脸上边距离图片上边界的距离 ；bottom：人脸下边距离图片上边界的距离
         ----------------------------------------------------------------------"""
        for i, d in enumerate(dets):
            x1 = d.top() if d.top() > 0 else 0
            y1 = d.bottom() if d.bottom() > 0 else 0
            x2 = d.left() if d.left() > 0 else 0
            y2 = d.right() if d.right() > 0 else 0
            """人脸大小64*64"""
            face = img[x1:y1, x2:y2]
            # 调整图片尺寸
            face = cv2.resize(face, (size, size))
            """6、人脸识别，输出识别结果"""
            print('It recognizes my face? %s' % face_recognise(face))

            """" (x2,y2)文本显示在图片的位置
                 cv2.FONT_HERSHEY_COMPLEX:字体类型
                 0.5字体大小
                 （0,255,0）：显示的文本颜色（红色）
                 2：字体粗细，数值表示占几个像素"""

            cv2.rectangle(img, (x2, x1), (y2, y1), (255, 0, 0), 3)     # cv2.rectangle 这个函数的作用是在图像上绘制一个简单的矩形。
            if face_recognise(face) == "Yes,my face":   # face_recognition库
                # 各参数依次是：图片，添加的文字，左上角坐标，字体，字体大小，颜色，字体粗细
                cv2.putText(img, 'Yes,my face', (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)
            else:
                cv2.putText(img, 'No,other face', (x1, y1), cv2.FONT_HERSHEY_COMPLEX, 1, (0, 255, 0), 2)

        cv2.imshow('image', img)   # 展示
        key = cv2.waitKey(30)
        if key == 27:
            sys.exit(0)

    sess.close()

### 使用Anaconda导库

### 图片的构成方式（颜色协议，哔哩哔哩可看博主讲解）

### 人脸数据集下载:http://vis-www.cs.umass.edu/lfw/lfw.tgz

我假装看天空

关注

34
点赞
踩
382

收藏

觉得还不错? 一键收藏
26
评论
基于卷积神经网络（cnn）的人脸识别（代码+注释）

卷积神经网络（cnn）+代码+注释
复制链接

扫一扫