Python3+OpenCV+TensorFlow进行人脸识别(改进精简版)

最新推荐文章于 2023-05-09 18:01:49 发布

weixin_49522298

最新推荐文章于 2023-05-09 18:01:49 发布

阅读量354

点赞数

文章标签：人脸识别深度学习 tensorflow opencv 图像识别

本文链接：https://blog.csdn.net/weixin_49522298/article/details/112763038

版权

前言

这篇文章基于我自己上一篇文章Python3+OpenCV+TensorFlow进行人脸识别
对于获取数据的部分没有修改；
主要改进和精简了后续的读取数据，模型训练和应用模型进行预测部分
毕竟有TF和Keras提供的现成接口，干嘛要自己重复造简陋的轮子呢？
直接贴代码
讲一下修改点：

nb的数量就是分类的种类数量，例如区分两个人就是2，区分5个人就是5
将原本自己写的“扫路径-读取文件-重定义尺寸-切分为训练数据和验证数据”等方法，直接用Keras提供的preprocessing.image_dataset_from_directory方法完成功能，完整的接口说明参考Keras 读取数据接口英语文档。直接读取目录下，每个子目录按字母排序分为一个index。需要注意的是label_mode，主要有三种模式int，categorical和binary：其中int的值就是0,1,2这种，categorical的则是[1,0,0]，[0,1,0]这种，binary则是二进制。seed是在shuffle为真时提供的随机种子，虽然感觉填什么都没啥区别…
Make model这一块弃用了Sequential的方式，但是效果是一样的；而且模型的各层也都是和上一篇文章完全一样的；另外把数据强化也集成到了这里，只是做了旋转，注意这里不可以做拉伸，因为会影响到size导致不一致无法训练。
训练模型时，不再做数据强化，也不再使用’ImageDataGenerator’；其实‘ImageDataGenerator’也可以直接从文件夹里读取数据，进行流式提供训练数据，接口文档也在上一个链接里；另外使用Call back来保存训练出来的模型。
最后读取数据进行预测这一块，直接用TensorFlow提供的接口好像只能读取图片，没深入研究能不能像OpenCV那样读取摄像头数据，所以做图片预测用的TensorFlow的接口；做视频预测还是用的OpenCV，另外因为这个脚本不再是做人脸识别的了，所以是直接全图识别的，要做人脸识别还是参考上一篇文章的，先用OpenCV的分类器抓取人脸再做预测。

import tensorflow as tf
import numpy as np
import cv2
from tensorflow import keras
from tensorflow.keras import layers
from keras.optimizers import SGD
import matplotlib.pyplot as plt
import time


image_size = (64, 64)
batch_size = 20
nb = 3


def load_data(data_path, image_size, batch_size):
    train_ds = keras.preprocessing.image_dataset_from_directory(
        data_path,
        validation_split=0.1,
        subset="training",
        label_mode='categorical',
        shuffle=True,
        seed=123,
        image_size=image_size,
        batch_size=batch_size,
    )
    val_ds = keras.preprocessing.image_dataset_from_directory(
        data_path,
        validation_split=0.1,
        subset="validation",
        label_mode='categorical',
        shuffle=True,
        seed=123,
        image_size=image_size,
        batch_size=batch_size,
    )
    return train_ds, val_ds


def view_data(dataset):
    plt.figure(figsize=(10, 10))
    for images, labels in dataset.take(1):
        for i in range(9):
            # ax = plt.subplot(3, 3, i + 1)
            plt.imshow(images[i].numpy().astype("uint8"))
            plt.title(np.argmax(labels[i]))
            plt.axis("off")
            plt.show()


def make_model(input_shape, num_classes):
    data_augmentation = keras.Sequential(
        [
            layers.experimental.preprocessing.RandomFlip("horizontal"),
            layers.experimental.preprocessing.RandomRotation(0.15),
        ]
    )
    inputs = keras.Input(shape=input_shape)
    # Image augmentation block
    x = data_augmentation(inputs)

    # Entry block
    x = layers.experimental.preprocessing.Rescaling(1.0 / 255)(x)
    x = layers.Conv2D(32, (3, 3), padding="same", activation='relu')(x)
    x = layers.Conv2D(32, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D((2, 2))(x)
    x = layers.Dropout(0.25)(x)
    x = layers.Conv2D(64, (3, 3), padding="same", activation='relu')(x)
    x = layers.Conv2D(64, (3, 3), activation='relu')(x)
    x = layers.MaxPooling2D(3, padding="same")(x)
    x = layers.Dropout(0.25)(x)
    x = layers.Flatten()(x)
    x = layers.Dense(512)(x)
    x = layers.Activation('relu')(x)
    x = layers.Dropout(0.5)(x)
    activation = "softmax"
    units = num_classes
    outputs = layers.Dense(units, activation=activation)(x)
    return keras.Model(inputs, outputs)


def train_model(train_ds, val_ds, model_name):
    epochs = 50
    callbacks = [
        keras.callbacks.ModelCheckpoint(model_name),
    ]
    sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True)
    model.compile(loss='categorical_crossentropy', optimizer=sgd, metrics=['accuracy'])
    model.fit(
        train_ds, epochs=epochs, callbacks=callbacks, validation_data=val_ds,
    )


def do_prediction_tf(model_name, image, image_size):
    model = keras.models.load_model(model_name)
    img = keras.preprocessing.image.load_img(image, target_size=image_size)
    img_array = keras.preprocessing.image.img_to_array(img)
    img_array = tf.expand_dims(img_array, 0)
    p_result = model.predict(img_array)
    result = np.argmax(p_result, axis=1)
    percentage = np.max(p_result)
    print('result: %d (%.2f)' % (result[0], percentage))
    return result[0], percentage


def do_prediction_cv2(model, image, image_size):
    image = cv2.resize(image, image_size)
    image = image.reshape((1, image_size[0], image_size[1], 3))
    p_result = model.predict(image)
    result = np.argmax(p_result, axis=1)
    per = np.max(p_result)
    print('result: %d(%.2f)' % (result[0], per))
    return result[0], per


def show_result(frame, result, x, y, color):
    if color == 'green':
        cv2.putText(frame, result,
                    (x + 30, y + 30),  # 坐标
                    cv2.FONT_HERSHEY_DUPLEX,  # 字体
                    1,  # 字号
                    (0, 255, 0),  # 颜色BGR
                    2)  # 字的线宽
    elif color == 'yellow':
        cv2.putText(frame, result, (x + 30, y + 30), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 255, 255), 2)
    elif color == 'red':
        cv2.putText(frame, result, (x + 30, y + 30), cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 255), 2)
    else:
        cv2.putText(frame, 'Color must be green/yellow/red!', (x + 30, y + 30),
                    cv2.FONT_HERSHEY_DUPLEX, 1, (0, 0, 255), 2)


def recognize_cam(model_path, cam_id, width, height):
    model = keras.models.load_model(model_path)
    fps = 24
    id_list_length = fps*15
    wait = int(1000/fps)
    size = (width, height)
    timestamp = time.strftime('%y%m%d%H%M%S', time.localtime())
    video = timestamp + '.mp4'
    video_writer = cv2.VideoWriter(video, cv2.VideoWriter_fourcc('m', 'p', '4', 'v'), fps, size)
    cap = cv2.VideoCapture(cam_id, cv2.CAP_DSHOW)  # 捕获指定摄像头的实时视频流
    cap.set(3, width)
    cap.set(4, height)
    id_list = []
    while True:
        ret, frame = cap.read()  # 读取一帧视频
        if ret is True:
            id, per = do_prediction_cv2(model, frame, image_size)
            per = round(per, 2)
            if id == 0:
                show_result(frame, 'A(' + str(per) + ')', 0, 0, 'green')
            elif id == 1:
                show_result(frame, 'B(' + str(per) + ')', 0, 0, 'yellow')
            elif id == 2:
                show_result(frame, 'C(' + str(per) + ')', 0, 0, 'red')
        cv2.imshow("Recognition", frame)
        k = cv2.waitKey(wait)  # 等待n毫秒看是否有按键输入
        if k & 0xFF == ord('q'):  # 如果输入q则退出循环
            break
        video_writer.write(frame)
    cap.release()  # 释放摄像头并销毁所有窗口
    cv2.destroyAllWindows()


if __name__ == '__main__':
    data_path = 'data'
    model_name = './model/classify_210117.h5'
    image = './data/C/201112011300_36.jpg'
    while True:
        a = input('To train model, input 1;\n'
                  'To predict image, input 2;\n'
                  'To predict cam, input 3:')
        if str(a) == '1':
            train_ds, valid_ds = load_data(data_path, image_size, batch_size)
            # view_data(train_ds)
            # view_data(valid_ds)
            train_ds = train_ds.prefetch(buffer_size=32)
            valid_ds = valid_ds.prefetch(buffer_size=32)
            model = make_model(input_shape=image_size + (3,), num_classes=nb)
            train_model(train_ds, valid_ds, model_name)
            break
        elif str(a) == '2':
            do_prediction_tf(model_name, image, image_size)
            break
        elif str(a) == '3':
            recognize_cam(model_name, 0, 800, 600)
            break
        else:
            print('Please input follow guide.')

weixin_49522298

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
2
评论
Python3+OpenCV+TensorFlow进行人脸识别(改进精简版)

前言这篇文章基于我自己上一篇文章Python3+OpenCV+TensorFlow进行人脸识别对于获取数据的部分没有修改；主要改进和精简了后续的读取数据，模型训练和应用模型进行预测部分毕竟有TF和Keras提供的现成接口，干嘛要自己重复造简陋的轮子呢？直接贴代码讲一下修改点：将原本自己写的“扫路径-读取文件-重定义尺寸”等方法，直接用Keras提供的preprocessing.image_dataset_from_directory方法完成功能，完整的接口说明参考Keras 读取数据接口英语
复制链接

扫一扫