目的:将自已的图片数据集转为pytorch数据,即torch.utils.data.DataLoader数据类型,以便作为CNN的输入数据
准备条件
1.图片数据,主文件夹下的子文件夹为标记好的图片数据
比如这个数据集:indoorCVPR_09
2.安装opencv,pytorch,tqdm(这个比较简单,自行百度)
代码实现
import os
import cv2
import numpy as np
from tqdm import tqdm
import torch.utils.data as Data
import torch
# 参数设置
torch.manual_seed(1) # 随机种子设置, 每次初始化数值一样
num_epochs = 100 # 训练次数
batch_size = 10
lr = 0.01
def read_image(path):
'''读取路径下所有子文件夹中的图片'''
train_x = []
train_y = []
test_x = []
test_y = []
n_class = 0
perClassNum = 120 # 每类图片数量
for child_dir in os.listdir(path): # 类
child_path = os.path.join(path, child_dir)
print(child_path)
imgCount = 0
testCount = 0
for dir_image in tqdm(os.listdir(child_path)): # 图片读取
imgCount += 1
if imgCount > perClassNum: # 每类用100张
break
img = cv2.imread(child_path + "\\" + dir_image, cv2.IMREAD_COLOR)
img = cv2.cvtColor(img,cv2.COLOR_BGR2GRAY)
img = cv2.resize(img, (256, 256))
img = np.reshape(img, (256, 256, 1))
img = img.transpose(2, 0, 1)
img = img / 255 # 归一化
if testCount < 0.3 * perClassNum: # 取30%作测试
testCount +=1
test_x.append(img)
test_y.append(n_class)
else:
train_x.append(img)
train_y.append(n_class)
n_class += 1
# # one-hot
# lb = LabelBinarizer().fit(np.array(range(n_class)))
# train_y = lb.transform(train_y)
# test_y = lb.transform(test_y)
# 转成pytorch数据
train_x = torch.tensor(train_x)
train_y = torch.tensor(train_y)
train_dataset = Data.TensorDataset(train_x, train_y)
train_loader = Data.DataLoader(dataset=train_dataset,
batch_size=batch_size,
shuffle=True)
test_x = torch.tensor(test_x)
test_y = torch.tensor(test_y)
test_dataset = Data.TensorDataset(test_x, test_y)
test_loader = Data.DataLoader(dataset=test_dataset,
batch_size=batch_size,
shuffle=False)
return train_loader, test_loader, n_class
调用时直接把路径传进行就行
train_loader, test_loader, n_class = read_image('D:\\indoorCVPR_09\\images')
如果有用,请随手点赞!