Pytorch加载图片数据集的两种方式

最新推荐文章于 2024-05-15 09:43:58 发布

金渐层猫

最新推荐文章于 2024-05-15 09:43:58 发布

阅读量1.5w

点赞数 35

分类专栏：深度学习小笔记

本文链接：https://blog.csdn.net/weixin_43917574/article/details/114625616

版权

深度学习小笔记专栏收录该内容

7 篇文章 1 订阅

订阅专栏

Pytorch加载图片数据集的两种方式

前言

前言

在Pytorch中加载图片数据集一般有两种方法。
第一种是使用 torchvision.datasets中的ImageFolder来读取图片然后用 DataLoader来并行加载，适合图片分类问题，简单但不灵活；
第二种是通过继承 torch.utils.data.Dataset 实现用户自定义读取数据集然后用 DataLoader来并行加载，较为灵活。下面分别介绍一下。
我们使用热狗数据集，从这里下载
在这里插入图片描述

使用torchvision.datasets中的ImageFolder

使用torchvision.datasets中的ImageFolder要求文件夹下的图片如图所示的形式放置。hotdog文件夹下分别是用于训练和评估的train和test文件夹，这两个文件夹下面均有hotdog和not-hotdog两个类别文件夹，每个类别文件夹里面是图像文件。
首先使用torchvision.transforms将图像调整为224×224尺寸并归一化，ImageFolder函数的第一个参数为每个类别文件夹所在的路径，第二个参数接收torchvision.transforms变换，返回所有图片数据集。
接着使用DataLoader来加载图片数据集，可设置batch_size大小。

train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"

# 将图像调整为224×224尺寸并归一化
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
train_augs = transforms.Compose([
    transforms.RandomResizedCrop(size=224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
test_augs = transforms.Compose([
    transforms.Resize(size=256),
    transforms.CenterCrop(size=224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
train_set = datasets.ImageFolder(train_dir, transform=train_augs)
test_set = datasets.ImageFolder(test_dir, transform=test_augs)

batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)

随机选取9张图片可视化一下：

import random
from matplotlib import pyplot as plt
def denorm(img):
    for i in range(img.shape[0]):
        img[i] = img[i] * std[i] + mean[i]
    return img
plt.figure(figsize=(8, 8))
for i in range(9):
    img, label = train_set[random.randint(0, len(train_set))]
    img = denorm(img)
    img = img.permute(1, 2, 0)
    ax = plt.subplot(3, 3, i + 1)
    ax.imshow(img.numpy())
    ax.set_title("label = %d" % label)
    ax.set_xticks([])
    ax.set_yticks([])
plt.show()

在这里插入图片描述
然后定义训练过程：
训练时，在每次epoch中，分别遍历训练集和验证集，分别设置net.train()和net.eval()。在训练集上，执行梯度清零，以及前向+反向+优化步骤；在验证集上，使用with torch.no_grad()避免梯度计算。

def train(net, train_iter, test_iter, criterion, optimizer, num_epochs):
    net = net.to(device)
    print("training on", device)
    for epoch in range(num_epochs):
        start = time.time()
        net.train()  # 训练模式
        train_loss_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()  # 梯度清零
            y_hat = net(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()

            train_loss_sum += loss.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1

        with torch.no_grad():
            net.eval()  # 评估模式
            test_acc_sum, n2 = 0.0, 0
            for X, y in test_iter:
                test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                n2 += y.shape[0]

        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_loss_sum / batch_count, train_acc_sum / n, test_acc_sum / n2, time.time() - start))

在ResNet18预训练模型上进行微调，对于预训练好的参数，使用较小的学习率来微调，而对于全连接层中的随机初始化参数，需要更大的学习率从头训练。最后执行训练过程：

pretrained_net = models.resnet18(pretrained=True)
num_ftrs = pretrained_net.fc.in_features
pretrained_net.fc = nn.Linear(num_ftrs, 2)

output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
                       {'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
                      lr=lr, weight_decay=0.001)

loss = torch.nn.CrossEntropyLoss()
train(pretrained_net, train_iter, test_iter, loss, optimizer, num_epochs=5)

训练过程：

training on cuda
epoch 1, loss 1.9044, train acc 0.793, test acc 0.815, time 25.9 sec
epoch 2, loss 0.4855, train acc 0.888, test acc 0.915, time 23.2 sec
epoch 3, loss 0.2449, train acc 0.923, test acc 0.938, time 17.3 sec
epoch 4, loss 0.2119, train acc 0.928, test acc 0.889, time 17.4 sec
epoch 5, loss 0.1601, train acc 0.939, test acc 0.925, time 17.3 sec

完整代码

import torch
from torch import nn, optim
from torch.utils.data import DataLoader
from torchvision import transforms, datasets, models
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"

# 将图像调整为224×224尺寸并归一化
mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]
train_augs = transforms.Compose([
    transforms.RandomResizedCrop(size=224),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
test_augs = transforms.Compose([
    transforms.Resize(size=256),
    transforms.CenterCrop(size=224),
    transforms.ToTensor(),
    transforms.Normalize(mean, std)
])
train_set = datasets.ImageFolder(train_dir, transform=train_augs)
test_set = datasets.ImageFolder(test_dir, transform=test_augs)

batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)

def train(net, train_iter, test_iter, criterion, optimizer, num_epochs):
    net = net.to(device)
    print("training on", device)
    for epoch in range(num_epochs):
        start = time.time()
        net.train()  # 训练模式
        train_loss_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()  # 梯度清零
            y_hat = net(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()

            train_loss_sum += loss.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1

        with torch.no_grad():
            net.eval()  # 评估模式
            test_acc_sum, n2 = 0.0, 0
            for X, y in test_iter:
                test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                n2 += y.shape[0]

        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_loss_sum / batch_count, train_acc_sum / n, test_acc_sum / n2, time.time() - start))


pretrained_net = models.resnet18(pretrained=True)
num_ftrs = pretrained_net.fc.in_features
pretrained_net.fc = nn.Linear(num_ftrs, 2)

output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
                       {'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
                      lr=lr, weight_decay=0.001)

loss = torch.nn.CrossEntropyLoss()
train(pretrained_net, train_iter, test_iter, loss, optimizer, num_epochs=5)

使用torch.utils.data.Dataset

通过继承 torch.utils.data.Dataset 实现用户自定义读取数据集，需要实现__init__ __getitem__和__len__方法。
在__init__中，需要初始化文件路径或文件名列表，以方便后面在__getitem__中读取。在这里，返回了所有图片样本的路径self.all_image_paths以及对应的标签self.all_image_labels，并对mean和std值进行了reshape。
在__getitem__中，需要根据索引读取数据，并对数据进行预处理，返回数据对，例如（图片，标签）对。在这里，将一张图片调整为224×224尺寸并进行归一化，根据torch的输入图片通道要求(C,H,W)进行了转置，返回了(img, label)对。
在__len__中，需要返回整个数据集的数量。

train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

class Hotdog(Dataset):
    def __init__(self, path):
        data_root = pathlib.Path(path)
        all_image_paths = list(data_root.glob('*/*'))
        self.all_image_paths = [str(path) for path in all_image_paths]
        label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
        label_to_index = dict((label, index) for index, label in enumerate(label_names))
        self.all_image_labels = [label_to_index[path.parent.name] for path in all_image_paths]
        self.mean = np.array(mean).reshape((1, 1, 3))
        self.std = np.array(std).reshape((1, 1, 3))

    def __getitem__(self, index):
        img = cv.imread(self.all_image_paths[index])
        img = cv.resize(img, (224, 224))
        img = img / 255.
        img = (img - self.mean) / self.std
        img = np.transpose(img, [2, 0, 1])
        label = self.all_image_labels[index]
        img = torch.tensor(img, dtype=torch.float32)
        label = torch.tensor(label)
        return img, label

    def __len__(self):
        return len(self.all_image_paths)

接着初始化该自定义数据集，使用DataLoader来加载图片数据集，可设置batch_size大小。

train_set = Hotdog(train_dir)
test_set = Hotdog(test_dir)
print(len(train_set))
print(len(test_set))

batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)

2000
800

随机选取9张图片可视化一下：

import random
from matplotlib import pyplot as plt
def denorm(img):
    for i in range(img.shape[0]):
        img[i] = img[i] * std[i] + mean[i]
    img = torch.clamp(img, 0., 1.)
    return img
plt.figure(figsize=(8, 8))
for i in range(9):
    img, label = train_set[random.randint(0, len(train_set))]
    img = denorm(img)
    img = img.permute(1, 2, 0)
    ax = plt.subplot(3, 3, i + 1)
    ax.imshow(img.numpy()[:, :, ::-1])
    ax.set_title("label = %d" % label)
    ax.set_xticks([])
    ax.set_yticks([])
plt.show()

在这里插入图片描述
然后在ResNet18预训练模型上进行微调，最后执行训练过程，和上面一样，就不复述了。
训练过程：

training on cuda
epoch 1, loss 1.5702, train acc 0.834, test acc 0.911, time 23.2 sec
epoch 2, loss 0.1694, train acc 0.958, test acc 0.939, time 19.1 sec
epoch 3, loss 0.0469, train acc 0.984, test acc 0.956, time 19.4 sec
epoch 4, loss 0.0163, train acc 0.998, test acc 0.956, time 19.3 sec
epoch 5, loss 0.0155, train acc 0.996, test acc 0.949, time 19.8 sec

完整代码

import torch
from torch import nn, optim
from torch.utils.data import Dataset, DataLoader
from torchvision import models
import pathlib
import numpy as np
import cv2 as cv
import time

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

train_dir = "../data/hotdog/train"
test_dir = "../data/hotdog/test"

mean = [0.485, 0.456, 0.406]
std = [0.229, 0.224, 0.225]

class Hotdog(Dataset):
    def __init__(self, path):
        data_root = pathlib.Path(path)
        all_image_paths = list(data_root.glob('*/*'))
        self.all_image_paths = [str(path) for path in all_image_paths]
        label_names = sorted(item.name for item in data_root.glob('*/') if item.is_dir())
        label_to_index = dict((label, index) for index, label in enumerate(label_names))
        self.all_image_labels = [label_to_index[path.parent.name] for path in all_image_paths]
        self.mean = np.array(mean).reshape((1, 1, 3))
        self.std = np.array(std).reshape((1, 1, 3))

    def __getitem__(self, index):
        img = cv.imread(self.all_image_paths[index])
        img = cv.resize(img, (224, 224))
        img = img / 255.
        img = (img - self.mean) / self.std
        img = np.transpose(img, [2, 0, 1])
        label = self.all_image_labels[index]
        img = torch.tensor(img, dtype=torch.float32)
        label = torch.tensor(label)
        return img, label

    def __len__(self):
        return len(self.all_image_paths)

train_set = Hotdog(train_dir)
test_set = Hotdog(test_dir)

batch_size = 32
train_iter = DataLoader(train_set, batch_size=batch_size, shuffle=True)
test_iter = DataLoader(test_set, batch_size=batch_size)

def train(net, train_iter, test_iter, criterion, optimizer, num_epochs):
    net = net.to(device)
    print("training on", device)
    for epoch in range(num_epochs):
        start = time.time()
        net.train()  # 训练模式
        train_loss_sum, train_acc_sum, n, batch_count = 0.0, 0.0, 0, 0
        for X, y in train_iter:
            X, y = X.to(device), y.to(device)
            optimizer.zero_grad()  # 梯度清零
            y_hat = net(X)
            loss = criterion(y_hat, y)
            loss.backward()
            optimizer.step()

            train_loss_sum += loss.cpu().item()
            train_acc_sum += (y_hat.argmax(dim=1) == y).sum().cpu().item()
            n += y.shape[0]
            batch_count += 1

        with torch.no_grad():
            net.eval()  # 评估模式
            test_acc_sum, n2 = 0.0, 0
            for X, y in test_iter:
                test_acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
                n2 += y.shape[0]

        print('epoch %d, loss %.4f, train acc %.3f, test acc %.3f, time %.1f sec'
              % (epoch + 1, train_loss_sum / batch_count, train_acc_sum / n, test_acc_sum / n2, time.time() - start))


pretrained_net = models.resnet18(pretrained=True)
num_ftrs = pretrained_net.fc.in_features
pretrained_net.fc = nn.Linear(num_ftrs, 2)

output_params = list(map(id, pretrained_net.fc.parameters()))
feature_params = filter(lambda p: id(p) not in output_params, pretrained_net.parameters())
lr = 0.01
optimizer = optim.SGD([{'params': feature_params},
                       {'params': pretrained_net.fc.parameters(), 'lr': lr * 10}],
                      lr=lr, weight_decay=0.001)

loss = torch.nn.CrossEntropyLoss()
train(pretrained_net, train_iter, test_iter, loss, optimizer, num_epochs=5)

金渐层猫

关注

35
点赞
踩
173

收藏

觉得还不错? 一键收藏
1
评论
Pytorch加载图片数据集的两种方式

在Pytorch中加载图片数据集一般有两种方法。第一种是使用 torchvision中的datasets.ImageFolder来读取图片然后用 DataLoader来并行加载，适合图片分类问题，简单但不灵活；第二种是通过继承 torch.utils.data.Dataset 实现用户自定义读取数据集然后用 DataLoader来并行加载，较为灵活。下面分别介绍一下。
复制链接

扫一扫

专栏目录