文章目录
VGG16识别猫狗
导入常用的库函数
import time
import os
import numpy as np
import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
获取数据
Cats vs Dogs Dataset
下载连接
https://www.kaggle.com/c/dogs-vs-cats/data
在Kaggl平台下进行解压
!unzip ../input/dogs-vs-cats/test1.zip
!unzip ../input/dogs-vs-cats/train.zip
dataset文件夹由两个子文件夹train和test1组成,分别包含jpg格式的训练图像和测试图像。请注意,测试集示例未标记。
import os
num_train_cats = len([i for i in os.listdir(os.path.join('dogs-vs-cats', 'train'))
if i.endswith('.jpg') and i.startswith('cat')])
num_train_dogs = len([i for i in os.listdir(os.path.join('dogs-vs-cats', 'train'))
if i.endswith('.jpg') and i.startswith('dog')])
print(f'Training set cats: {num_train_cats}')
print(f'Training set dogs: {num_train_dogs}')
Training set cats: 12500
Training set dogs: 12500
查看数据
img = Image.open(os.path.join('./','train', 'cat.59.jpg'))
print(np.asarray(img, dtype=np.uint8).shape)
plt.imshow(img)
创建验证和测试子集
将2500个图像从训练文件夹移动到测试集文件夹中。
将2500张图像从训练文件夹移动到验证集文件夹中。
if not os.path.exists(os.path.join('./', 'test')):
os.mkdir(os.path.join('./', 'test'))
if not os.path.exists(os.path.join('./', 'valid')):
os.mkdir(os.path.join('./', 'valid'))
for fname in os.listdir(os.path.join('./', 'train')):
if not fname.endswith('.jpg'):
continue
_, img_num, _ = fname.split('.')
filepath = os.path.join('./', 'train', fname)
img_num = int(img_num)
if img_num > 11249:
os.rename(filepath, filepath.replace('train', 'test'))
elif img_num > 9999:
os.rename(filepath, filepath.replace('train', 'valid'))
标准化图像
class CatsDogsDataset(Dataset):
"""Custom Dataset for loading CelebA face images"""
def __init__(self, img_dir, transform=None):
self.img_dir = img_dir
self.img_names = [i for i in
os.listdir(img_dir)
if i.endswith('.jpg')]
self.y = []
for i in self.img_names:
if i.split('.')[0] == 'cat':
self.y.append(0)
else:
self.y.append(1)
self.transform = transform
def __getitem__(self, index):
img = Image.open(os.path.join(self.img_dir,
self.img_names[index]))
if self.transform is not None:
img = self.transform(img)
label = self.y[index]
return img, label
def __len__(self):
return len(self.y)
custom_transform1 = transforms.Compose([transforms.Resize([64, 64]),
transforms.ToTensor()])
train_dataset = CatsDogsDataset(img_dir=os.path.join('./', 'train'),
transform=custom_transform1)
train_loader = DataLoader(dataset=train_dataset,
batch_size=5000,
shuffle=False)
train_mean = []
train_std = []
for i, image in enumerate(train_loader, 0):
numpy_image = image[0].numpy()
batch_mean = np.mean(numpy_image, axis=(0, 2, 3))
batch_std = np.std(numpy_image, axis=(0, 2, 3))
train_mean.append(batch_mean)
train_std.append(batch_std)
train_mean = torch.tensor(np.mean(train_mean, axis=0))
train_std = torch.tensor(np.mean(train_std, axis=0))
print('Mean:', train_mean)
print('Std Dev:', train_std)
Mean: tensor([0.4875, 0.4544, 0.4164])
Std Dev: tensor([0.2521, 0.2453, 0.2481])
创建数据集
BATCH_SIZE = 128
data_transforms = {
'train': transforms.Compose([
transforms.RandomRotation(5),
transforms.RandomHorizontalFlip(),
transforms.RandomResizedCrop(64, scale=(0.96, 1.0), ratio=(0.95, 1.05)),
transforms.ToTensor(),
transforms.Normalize(train_mean, train_std)
]),
'valid': transforms.Compose([
transforms.Resize([64, 64]),
transforms.ToTensor(),
transforms.Normalize(train_mean, train_std)
]),
}
train_dataset = CatsDogsDataset(img_dir=os.path.join('./', 'train'),
transform=data_transforms['train'])
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
drop_last=True,
shuffle=True)
valid_dataset = CatsDogsDataset(img_dir=os.path.join('./', 'valid'),
transform=data_transforms['valid'])
valid_loader = DataLoader(dataset=valid_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
test_dataset = CatsDogsDataset(img_dir=os.path.join('./', 'test'),
transform=data_transforms['valid'])
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
创建模型
class VGG16(torch.nn.Module):
def __init__(self, num_classes):
super(VGG16, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.block_1 = nn.Sequential(
nn.Conv2d(in_channels=3,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
# (1(32-1)- 32 + 3)/2 = 1
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.block_5 = nn.Sequential(
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(512*2*2, 4096),
nn.ReLU(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Linear(4096, num_classes)
)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d):
#n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
#m.weight.data.normal_(0, np.sqrt(2. / n))
m.weight.detach().normal_(0, 0.05)
if m.bias is not None:
m.bias.detach().zero_()
elif isinstance(m, torch.nn.Linear):
m.weight.detach().normal_(0, 0.05)
m.bias.detach().detach().zero_()
def forward(self, x):
x = self.block_1(x)
x = self.block_2(x)
x = self.block_3(x)
x = self.block_4(x)
x = self.block_5(x)
logits = self.classifier(x.view(-1, 512*2*2))
probas = F.softmax(logits, dim=1)
return logits,probas
NUM_EPOCHS = 8
model = VGG16(num_classes=2)
model = model.to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
训练
开启训练
def compute_accuracy_and_loss(model, data_loader, device):
correct_pred, num_examples = 0, 0
cross_entropy = 0.
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
cross_entropy += F.cross_entropy(logits, targets).item()
_, predicted_labels = torch.max(probas, 1)
num_examples += targets.size(0)
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100, cross_entropy/num_examples
start_time = time.time()
train_acc_lst, valid_acc_lst = [], []
train_loss_lst, valid_loss_lst = [], []
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
### PREPARE MINIBATCH
features = features.to(DEVICE)
targets = targets.to(DEVICE)
### FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
### UPDATE MODEL PARAMETERS
optimizer.step()
### LOGGING
if not batch_idx % 120:
print (f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} | '
f'Batch {batch_idx:03d}/{len(train_loader):03d} |'
f' Cost: {cost:.4f}')
# no need to build the computation graph for backprop when computing accuracy
model.eval()
with torch.set_grad_enabled(False):
train_acc, train_loss = compute_accuracy_and_loss(model, train_loader, device=DEVICE)
valid_acc, valid_loss = compute_accuracy_and_loss(model, valid_loader, device=DEVICE)
train_acc_lst.append(train_acc)
valid_acc_lst.append(valid_acc)
train_loss_lst.append(train_loss)
valid_loss_lst.append(valid_loss)
print(f'Epoch: {epoch+1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}%'
f' | Validation Acc.: {valid_acc:.2f}%')
elapsed = (time.time() - start_time)/60
print(f'Time elapsed: {elapsed:.2f} min')
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')
Epoch: 001/008 | Batch 000/156 | Cost: 15317.9863
Epoch: 001/008 | Batch 120/156 | Cost: 0.6863
Epoch: 001/008 Train Acc.: 62.80% | Validation Acc.: 62.36%
Time elapsed: 3.15 min
Epoch: 002/008 | Batch 000/156 | Cost: 0.6732
Epoch: 002/008 | Batch 120/156 | Cost: 0.6635
Epoch: 002/008 Train Acc.: 63.87% | Validation Acc.: 64.56%
Time elapsed: 6.31 min
Epoch: 003/008 | Batch 000/156 | Cost: 0.5801
Epoch: 003/008 | Batch 120/156 | Cost: 0.6056
Epoch: 003/008 Train Acc.: 66.84% | Validation Acc.: 66.48%
Time elapsed: 9.47 min
Epoch: 004/008 | Batch 000/156 | Cost: 0.6102
Epoch: 004/008 | Batch 120/156 | Cost: 0.6219
Epoch: 004/008 Train Acc.: 68.42% | Validation Acc.: 67.92%
Time elapsed: 12.60 min
Epoch: 005/008 | Batch 000/156 | Cost: 0.6080
Epoch: 005/008 | Batch 120/156 | Cost: 0.6027
Epoch: 005/008 Train Acc.: 68.54% | Validation Acc.: 67.44%
Time elapsed: 15.69 min
Epoch: 006/008 | Batch 000/156 | Cost: 0.5779
Epoch: 006/008 | Batch 120/156 | Cost: 0.6334
Epoch: 006/008 Train Acc.: 68.95% | Validation Acc.: 68.16%
Time elapsed: 18.80 min
Epoch: 007/008 | Batch 000/156 | Cost: 0.6451
Epoch: 007/008 | Batch 120/156 | Cost: 0.6107
Epoch: 007/008 Train Acc.: 69.16% | Validation Acc.: 67.68%
Time elapsed: 21.92 min
Epoch: 008/008 | Batch 000/156 | Cost: 0.5551
Epoch: 008/008 | Batch 120/156 | Cost: 0.5993
Epoch: 008/008 Train Acc.: 70.10% | Validation Acc.: 69.44%
Time elapsed: 25.06 min
Total Training Time: 25.06 min
训练损失和测试损失关系图
plt.plot(range(1, NUM_EPOCHS+1), train_loss_lst, label='Training loss')
plt.plot(range(1, NUM_EPOCHS+1), valid_loss_lst, label='Validation loss')
plt.legend(loc='upper right')
plt.ylabel('Cross entropy')
plt.xlabel('Epoch')
plt.show()
训练精度和测试精度关系图
plt.plot(range(1, NUM_EPOCHS+1), train_acc_lst, label='Training accuracy')
plt.plot(range(1, NUM_EPOCHS+1), valid_acc_lst, label='Validation accuracy')
plt.legend(loc='upper left')
plt.ylabel('Cross entropy')
plt.xlabel('Epoch')
plt.show()
保存权重
PATH = "./my_net.pth"
torch.save(model.state_dict(), PATH)