将自己的数据集制作成形如CIFAR10形式

最新推荐文章于 2023-04-11 18:25:42 发布

Hedwig_Peng

最新推荐文章于 2023-04-11 18:25:42 发布

阅读量1.9k

点赞数 1

文章标签： python CIFAR deep learning

本文链接：https://blog.csdn.net/yimodanfeng/article/details/103229019

版权

在深度学习中，遇到了cifar10图像数据集经典好用。萌发了将其他数据也制作成类似的形式的想法。
ModelNet40是普林斯顿提供的3D数据集。有学者提出了多视角的数据结构。
在这里插入图片描述
上图为图片名字属性，下方为代码。

"""
This file is to read Multi-view data for ModelNet40
One-view
"""
'''
 40 class = ['airplane', 'bathtub', 'bed', 'bench', 'bookshelf', 'bottle', 'bowl',
             'car', 'chair', 'cone, 'cup', 'curtain', 'desk', 'door', 'dresser', 
             'flower_pot', 'glass_box', 'guitar', 'keyboard', 'lamp', 'laptop',
             'mantel', 'moniter', 'night_stand', 'person', 'piano', 'plant', 'radio',
             'range_hood', 'sink', 'sofa', 'stairs', 'stool', 'table', 'tent', 'toilet',
             'tv_stand', 'vase', 'wardrobe', 'xbox'];
'''

import os
import cv2
import numpy as np
import pickle

# 数据集所在的地址
main_dir = "D:/dataset/ModelNet/modelnet40v1"
# 存储地址
save_dir = "D:/dataset/ModelNet"

# 创建空余的存储
traindata =[[]]
trainlabel = []
##trainname = []
testdata = [[]]
testlabel = []
#testname = []

# 第一类数据
i = 0
print("the numer of class", i)
# os.listdir(main_dir) 获取 main_dir 的所有文件
for main_name in os.listdir(main_dir):
    #print(main_name)  40类，每一类的文件夹
    # 将每一类添加到路径中，然后遍历子文件夹
    sub_dir = os.path.join(main_dir, main_name)
    # print(sub_dir)
    # 每一类下面的两个子文件夹： 'train' 和 'test'
    for sub_name in os.listdir(sub_dir):
        sub_sub_dir = os.path.join(sub_dir,sub_name)
        # 遍历训练子集下所有的图像，并且生成数据
        if sub_name == 'train':
        #print(sub_sub_dir)
            count_train = 0
            for sub_sub_name in os.listdir(sub_sub_dir):
                view = sub_sub_name.split('_')
                '''
                因为有些类名，经过分割后会分成四项，因此不同于其他的项
                '''
                len_view = len(view)
                if len_view == 3:
                    view2 = view[2]
                    if view2 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print(img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if traindata == [[]]:
                            traindata = [img1]
                        else:
                            traindata = np.concatenate((traindata, [img1]), axis = 0)
                        count_train = count_train + 1
                        cv2.destroyAllWindows() 

                else:
                    view3 = view[3]
                    if view3 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print(img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if traindata == [[]]:
                            traindata = [img1]
                        else:
                            traindata = np.concatenate((traindata, [img1]), axis = 0)
                        count_train = count_train + 1
                        cv2.destroyAllWindows() 
        # 遍历测试子集下所有的图像，并且生成数据
        if sub_name == 'test':
        #print(sub_sub_dir)
            count_test = 0
            for sub_sub_name in os.listdir(sub_sub_dir):
                view = sub_sub_name.split('_')
                '''
                因为有些类名，经过分割后会分成四项，因此不同于其他的项
                '''
                len_view = len(view)
                if len_view == 3:
                    view2 = view[2]
                    if view2 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print("The dir of img", img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if testdata == [[]]:
                            testdata = [img1]
                        else:
                            testdata = np.concatenate((testdata, [img1]), axis = 0)
                        count_test = count_test + 1
                else:
                    view3 = view[3]
                    if view3 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print("The dir of img", img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if testdata == [[]]:
                            testdata = [img1]
                        else:
                            testdata = np.concatenate((testdata, [img1]), axis = 0)
                        count_test = count_test + 1
        # 遍历测试子集下所有的图像，并且生成数据
    '''
    训练集标签
    '''
    trainlabels = np.zeros((count_train,))
    if trainlabel == []:
        trainlabel = trainlabels
    else:
        trainlabels[:] = i
        trainlabel = np.append(trainlabel, trainlabels)
    
    '''
    测试集标签
    '''
    testlabels = np.zeros((count_test,))
    if testlabel == []:
        testlabel = testlabels
    else:
        testlabels[:] = i
        testlabel = np.append(testlabel, testlabels)

    i = i + 1

    print("the numer of class", i)
# 数据类型转换，将数据转换成整型
trainlabel = trainlabel.astype(np.int32)
testlabel = testlabel.astype(np.int32)
#np.save(save_dir + 'train.npy', traindata)
# 创建字典来进行保存
train_dict = {'data': traindata, 'label': trainlabel}
test_dict = {'data': testdata, 'label': testlabel}

f_train = open('D:/dataset/ModelNet/data/train','wb')
f_test = open('D:/dataset/ModelNet/data/test','wb')

pickle.dump(train_dict, f_train)
pickle.dump(test_dict, f_test)

参考
深度学习，制作类似cifar10图像数据集

Hedwig_Peng

关注

1
点赞
踩
12

收藏

觉得还不错? 一键收藏
2
评论
将自己的数据集制作成形如CIFAR10形式

在深度学习中，遇到了cifar10图像数据集经典好用。萌发了将其他数据也制作成类似的形式的想法。ModelNet40是普林斯顿提供的3D数据集。有学者提出了多视角的数据结构。"""This file is to read Multi-view data for ModelNet40One-view"""''' 40 class = ['airplane', 'bathtub', 'b...
复制链接

扫一扫