PyTorch提供了ImageFolder的类来加载文件结构如下的图片数据集:
root/dog/xxx.png
root/dog/xxy.png
root/dog/xxz.png
root/cat/123.png
root/cat/nsdf3.png
root/cat/asd932_.png
使用这个类的问题在于无法将训练集(training dataset)和验证集(validation dataset)分开。我写了两个类来完成这个工作。
import os
import torch
from torch.utils.data import Dataset, DataLoader
from torchvision.transforms import ToTensor, Resize, Compose
from PIL import Image
from sklearn.model_selection import train_test_split
class ImageFolderSplitter:
# images should be placed in folders like:
# --root
# ----root\dogs
# ----root\dogs\image1.png
# ----root\dogs\image2.png
# ----root\cats
# ----root\cats\image1.png
# ----root\cats\image2.png
# path: the root of the image folder
def __init__(self, path, train_size = 0.8):
self.path = path
self.train_size = train_size
self.class2