将自己的数据集制作成形如CIFAR10形式

在深度学习中,遇到了cifar10图像数据集经典好用。萌发了将其他数据也制作成类似的形式的想法。
ModelNet40是普林斯顿提供的3D数据集。有学者提出了多视角的数据结构。
在这里插入图片描述
上图为图片名字属性,下方为代码。

"""
This file is to read Multi-view data for ModelNet40
One-view
"""
'''
 40 class = ['airplane', 'bathtub', 'bed', 'bench', 'bookshelf', 'bottle', 'bowl',
             'car', 'chair', 'cone, 'cup', 'curtain', 'desk', 'door', 'dresser', 
             'flower_pot', 'glass_box', 'guitar', 'keyboard', 'lamp', 'laptop',
             'mantel', 'moniter', 'night_stand', 'person', 'piano', 'plant', 'radio',
             'range_hood', 'sink', 'sofa', 'stairs', 'stool', 'table', 'tent', 'toilet',
             'tv_stand', 'vase', 'wardrobe', 'xbox'];
'''

import os
import cv2
import numpy as np
import pickle

# 数据集所在的地址
main_dir = "D:/dataset/ModelNet/modelnet40v1"
# 存储地址
save_dir = "D:/dataset/ModelNet"

# 创建空余的存储
traindata =[[]]
trainlabel = []
##trainname = []
testdata = [[]]
testlabel = []
#testname = []

# 第一类数据
i = 0
print("the numer of class", i)
# os.listdir(main_dir) 获取 main_dir 的所有文件
for main_name in os.listdir(main_dir):
    #print(main_name)  40类,每一类的文件夹
    # 将每一类添加到路径中,然后遍历子文件夹
    sub_dir = os.path.join(main_dir, main_name)
    # print(sub_dir)
    # 每一类下面的两个子文件夹: 'train' 和 'test'
    for sub_name in os.listdir(sub_dir):
        sub_sub_dir = os.path.join(sub_dir,sub_name)
        # 遍历训练子集下所有的图像,并且生成数据
        if sub_name == 'train':
        #print(sub_sub_dir)
            count_train = 0
            for sub_sub_name in os.listdir(sub_sub_dir):
                view = sub_sub_name.split('_')
                '''
                因为有些类名,经过分割后会分成四项,因此不同于其他的项
                '''
                len_view = len(view)
                if len_view == 3:
                    view2 = view[2]
                    if view2 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print(img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if traindata == [[]]:
                            traindata = [img1]
                        else:
                            traindata = np.concatenate((traindata, [img1]), axis = 0)
                        count_train = count_train + 1
                        cv2.destroyAllWindows() 

                else:
                    view3 = view[3]
                    if view3 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print(img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if traindata == [[]]:
                            traindata = [img1]
                        else:
                            traindata = np.concatenate((traindata, [img1]), axis = 0)
                        count_train = count_train + 1
                        cv2.destroyAllWindows() 
        # 遍历测试子集下所有的图像,并且生成数据
        if sub_name == 'test':
        #print(sub_sub_dir)
            count_test = 0
            for sub_sub_name in os.listdir(sub_sub_dir):
                view = sub_sub_name.split('_')
                '''
                因为有些类名,经过分割后会分成四项,因此不同于其他的项
                '''
                len_view = len(view)
                if len_view == 3:
                    view2 = view[2]
                    if view2 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print("The dir of img", img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if testdata == [[]]:
                            testdata = [img1]
                        else:
                            testdata = np.concatenate((testdata, [img1]), axis = 0)
                        count_test = count_test + 1
                else:
                    view3 = view[3]
                    if view3 == '001.jpg':
                        img_dir = os.path.join(sub_sub_dir,sub_sub_name)
                        print("The dir of img", img_dir)
                        img1 = cv2.imread(img_dir,cv2.IMREAD_GRAYSCALE)
                        #img1 = img1.reshape(1,50176)
                        img1 = img1.reshape([50176])
                        img1 = np.array(img1)
                        if testdata == [[]]:
                            testdata = [img1]
                        else:
                            testdata = np.concatenate((testdata, [img1]), axis = 0)
                        count_test = count_test + 1
        # 遍历测试子集下所有的图像,并且生成数据
    '''
    训练集标签
    '''
    trainlabels = np.zeros((count_train,))
    if trainlabel == []:
        trainlabel = trainlabels
    else:
        trainlabels[:] = i
        trainlabel = np.append(trainlabel, trainlabels)
    
    '''
    测试集标签
    '''
    testlabels = np.zeros((count_test,))
    if testlabel == []:
        testlabel = testlabels
    else:
        testlabels[:] = i
        testlabel = np.append(testlabel, testlabels)

    i = i + 1

    print("the numer of class", i)
# 数据类型转换,将数据转换成整型
trainlabel = trainlabel.astype(np.int32)
testlabel = testlabel.astype(np.int32)
#np.save(save_dir + 'train.npy', traindata)
# 创建字典来进行保存
train_dict = {'data': traindata, 'label': trainlabel}
test_dict = {'data': testdata, 'label': testlabel}

f_train = open('D:/dataset/ModelNet/data/train','wb')
f_test = open('D:/dataset/ModelNet/data/test','wb')

pickle.dump(train_dict, f_train)
pickle.dump(test_dict, f_test)

参考
深度学习,制作类似cifar10图像数据集

  • 1
    点赞
  • 12
    收藏
    觉得还不错? 一键收藏
  • 2
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 2
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值