制作人脸数据集

最新推荐文章于 2023-09-29 20:18:11 发布

新华小霸王

最新推荐文章于 2023-09-29 20:18:11 发布

阅读量702

点赞数

分类专栏：深度学习数据集 json 文章标签：人脸数据集

本文链接：https://blog.csdn.net/weixin_44297729/article/details/103354422

版权

深度学习同时被 3 个专栏收录

6 篇文章 0 订阅

订阅专栏

数据集

1 篇文章 0 订阅

订阅专栏

json

1 篇文章 0 订阅

订阅专栏

制作人脸数据集

生成json格式数据集
读取json格式数据集

生成json格式数据集

在前面进行了人脸数据的采集后，我们需要将收集到的大量人脸数据进行汇总以及贴上标签。这里我会对收集到的灰度图像进行简单的处理，然后将图片数据以json格式保存到json文件中，方便后续对数据集的调用。
create_data.py

import os
import cv2
import json
import numpy as np
from keras.utils import np_utils
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split


# 改变图像大小
def resize(image, size=(64, 64)):
    image = cv2.resize(image, size)
    return image


# 图像边界填充
def getpaddingSize(image):
    # 根据图像的边界的像素值，向外扩充图片，每个方向扩充50个像素。
    image = cv2.copyMakeBorder(image, 50, 50, 50, 50, cv2.BORDER_REPLICATE)
    return image


# 标签转化成one-hot形式
def label_to_onehot(label_encoder):
    y_train_onehot = np_utils.to_categorical(label_encoder)
    return y_train_onehot


# 制作图片数据和标签数据x_total, y_total
def create_labels_data(label_data_list):
    images = []
    labels = []
    for filepath, label in label_data_list:
        for img in os.listdir(filepath):
            img_path = os.path.join(filepath, img)
            image = cv2.imread(img_path, cv2.IMREAD_GRAYSCALE)
            # 图像处理
            image = getpaddingSize(image)
            image = resize(image)
            images.append(image)
            labels.append(label)

    # 转换为矩阵形式
    x_total = np.array(images)
    y_total = np.array(labels)
    return x_total, y_total


# 将人脸数据保存为json文件
def save_to_json(json_filename, face_data_dict, json_dir_path='json_file'):
    json_file_path = os.path.join(json_dir_path, json_filename)
    if not os.path.exists(json_dir_path):
        os.mkdir(json_dir_path)
    with open(json_file_path, 'w') as f:
        json.dump(face_data_dict, f)


def create_data(path, json_filename):
    data_dict = {}
    face_data_dict = {}
    """
      获取指定目录下的所有图片数据
      将图片数据文件夹路径与标签名称放进字典
      输出{'name': path}形式
      例如：{'liqingze': face_data/liqingze}
    """
    for name in os.listdir(path):
        label_path = os.path.join(path, name)
        if os.path.isdir(label_path):
            data_dict[name] = label_path

    name_list, path_list = data_dict.keys(), data_dict.values()
    name_array = np.array(list(name_list))
    """
    LabelEncoder是用来对分类型特征值进行编码，即对不连续的数值或文本进行编码。其中包含以下常用方法：
    fit(y) ：fit可看做一本空字典，y可看作要塞到字典中的词。 
    fit_transform(y)：相当于先进行fit再进行transform，即把y塞到字典中去以后再进行transform得到索引值。 
    inverse_transform(y)：根据索引值y获得原始数据。 
    transform(y) ：将y转变成索引值。
    """
    label_encoder = LabelEncoder()
    y_label = label_encoder.fit_transform(name_array)

    # 需要转化为One_hot形式
    y_label_onehot = label_to_onehot(y_label)
    # 路径和标签整合成一个列表
    label_data_list = list(zip(path_list, y_label_onehot))

    # 读取x_total，y_total数据
    x_total, y_total = create_labels_data(label_data_list)

    """
    利用sklearn.model.selection中封装的train_test_spilt方法将数据集分为训练集、测试集和验证集
    sklearn.model.selection.train_test_spilt(*arrays, **options)
    参数说明：
    train_data：待划分样本数据
    train_target：待划分样本数据的结果（标签）
    test_size：测试数据占样本数据的比例，若整数则样本数量
    random_state：设置随机数种子，保证每次都是同一个随机数。若为0或不填，则每次得到数据都不一样
    """
    x_train, x_test, y_train, y_test = train_test_split(x_total, y_total, test_size=0.2, random_state=1)
    x_train = x_train
    x_test = x_test
    face_data_dict['x_train'] = x_train.tolist()
    face_data_dict['x_test'] = x_test.tolist()
    face_data_dict['y_train'] = y_train.tolist()
    face_data_dict['y_test'] = y_test.tolist()
    save_to_json(json_filename, face_data_dict)


if __name__ == '__main__':
    # 人脸数据的路径
    face_path = "face_data"
    # 存放数据的json文件
    json_filename = "face_data.json"
    # 将收集的人脸数据分配标签，以json格式存放数据
    create_data(face_path, json_filename)

前面我们收集的数据存放形式如下：
数据集

运行如上代码得到json格式数据集
json文件

face_data.json
x_train

y_train

读取json格式数据集

读取数据示例代码：

import json
import numpy as np


# 读取json文件中的人脸数据
def load_data(file):
    with open(file, 'r') as f:
        data = f.read()
    data = json.loads(data)
    x_train = np.array(data['x_train'])
    x_test = np.array(data['x_test'])
    y_train = np.array(data['y_train'])
    y_test = np.array(data['y_test'])

    return (x_train, y_train), (x_test, y_test)


if __name__ == '__main__':
    file = 'json_file/face_data.json'
    (x_train, y_train), (x_test, y_test) = load_data(file)

新华小霸王

关注

0
点赞
踩
2

收藏

觉得还不错? 一键收藏
0
评论
制作人脸数据集

制作人脸数据集生成json格式数据集读取json格式数据集生成json格式数据集在前面进行了人脸数据的采集后，我们需要将收集到的大量人脸数据进行汇总以及贴上标签。这里我会对收集到的灰度图像进行简单的处理，然后将图片数据以json格式保存到json文件中，方便后续对数据集的调用。create_data.pyimport osimport cv2import jsonimport num...
复制链接

扫一扫