首先定义一个类继承Dataset类
from torch.utils.data import Dataset
import os
from PIL import Image
class MyData(Dataset):
def __init__(self, root_dir, label_dir):
self.root_dir = root_dir
self.label_dir = label_dir
self.path = os.path.join(root_dir, label_dir)
self.img_path = os.listdir(self.path)
def __getitem__(self, idx):
img_name = self.img_path[idx]
img_item_path = os.path.join(self.root_dir, self.label_dir, img_name)
img = Image.open(img_item_path)
label = self.label_dir
return img, label
def __len__(self):
return len(self.img_path)
然后实例化类
root_dir = 'dataset/train'
ants_label_dir = 'ants_image'
bees_label_dir = 'bees_image'
ants_dataset = MyData(root_dir, ants_label_dir)
bees_dataset = MyData(root_dir, bees_label_dir)
从pycharm的控制台先跑一下,可以看看实例化的对象里面都有什么属性:
可以看到,我们已经从给出的路径中提取到了我们想要的文件,即保存图片路径的一个集合,我们就可以再对获取到的dataset进行操作了。