自定义数据集的代码如下:
import os
import pandas as pd
from torchvision.io import read_image
class CustomImageDataset(Dataset):
def __init__(self, annotations_file, img_dir, transform=None, target_transform=None):
self.img_labels = pd.read_csv(annotations_file)
self.img_dir = img_dir
self.transform = transform
self.target_transform = target_transform
def __len__(self):
return len(self.img_labels)
def __getitem__(self, idx):
img_path = os.path.join(self.img_dir, self.img_labels.iloc[idx, 0])
image = read_image(img_path)
label = self.img_labels.iloc[idx, 1]
#如果需要transform。则这里传入class当中的transform函数进行transform
if self.transform:
image = self.transform(image)
#另一种transform
if self.target_transform:
label = self.target_transform(label)
#先返回每一张图片,然后再返回当前图片的label
return image, label
现在我们的自定义数据集即将做好了,然后使用dataloader模块打包数据集:
from torch.utils.data import DataLoader
train_dataloader = DataLoader(CustomImageDataset(annotations_file, img_dir, transform=None, target_transform=None), batch_size=64, shuffle=True)
test_dataloader = DataLoader(CustomImageDataset(annotations_file, img_dir, transform=None, target_transform=None) batch_size=64, shuffle=True)
接下来就可以开始训练啦!!!
train fuction的代码:
def train(dataloader, model, loss_fn, optimizer):
size = len(dataloader.dataset)
model.train()
for batch, (X, y) in enumerate(dataloader):
X, y = X.to(device), y.to(device)
# Compute prediction error
pred = model(X)
loss = loss_fn(pred, y)
# Backpropagation
optimizer.zero_grad()
loss.backward()
optimizer.step()
if batch % 100 == 0:
loss, current = loss.item(), batch * len(X)
print(f"loss: {loss:>7f} [{current:>5d}/{size:>5d}]")
test function的代码:
def test(dataloader, model, loss_fn):
size = len(dataloader.dataset)
num_batches = len(dataloader)
model.eval()
test_loss, correct = 0, 0
with torch.no_grad():
for X, y in dataloader:
X, y = X.to(device), y.to(device)
pred = model(X)
test_loss += loss_fn(pred, y).item()
correct += (pred.argmax(1) == y).type(torch.float).sum().item()
test_loss /= num_batches
correct /= size
print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")
一共使用5个epoch。因此代码如下:
epochs = 5
for t in range(epochs):
print(f"Epoch {t+1}\n-------------------------------")
train(train_dataloader, model, loss_fn, optimizer)
test(test_dataloader, model, loss_fn)
print("Done!")