config.py
定义了一些超参数和模型的架构信息,例如设备类型、随机种子、学习率、训练轮数、批次大小和类别数量
import torch
# Device
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
# Hyperparameters
RANDOM_SEED = 1
LEARNING_RATE = 0.001
NUM_EPOCHS = 3
BATCH_SIZE = 128
# Architecture
NUM_CLASSES = 2
utils.py
包含了一些辅助函数,如计算数据集大小、展示图像、移动图像等
import os
import numpy as np
import torch
import torch.nn.functional as F
from torch.utils.data import Dataset
from torch.utils.data import DataLoader
from torchvision import transforms
from PIL import Image
import matplotlib.pyplot as plt
def calc_PicNum(): # calc tot_pic number
num_train_cats = len([i for i in os.listdir(os.path.join('dogs-vs-cats', 'train'))
if i.endswith('.jpg') and i.startswith('cat')])
num_train_dogs = len([i for i in os.listdir(os.path.join('dogs-vs-cats', 'train'))
if i.endswith('.jpg') and i.startswith('dog')])
print(f'Training set cats: {num_train_cats}')
print(f'Training set dogs: {num_train_dogs}')
def show_Onepic(picName): # show
img = Image.open(os.path.join('dogs-vs-cats', 'train', picName))
# print(np.asarray(img, dtype=np.uint8).shape)
plt.imshow(img)
plt.show()
def move_pic():
# Move 2500 images from the training folder into a test set folder & 2500 to validation set folder
if not os.path.exists(os.path.join('dogs-vs-cats', 'test')):
os.mkdir(os.path.join('dogs-vs-cats', 'test'))
if not os.path.exists(os.path.join('dogs-vs-cats', 'valid')):
os.mkdir(os.path.join('dogs-vs-cats', 'valid'))
for fname in os.listdir(os.path.join('dogs-vs-cats', 'train')):
if not fname.endswith('.jpg'):
continue
_, img_num, _ = fname.split('.') # 将文件名按'.'进行拆分,获取图像编号
filepath = os.path.join('dogs-vs-cats', 'train', fname)
img_num = int(img_num)
if img_num > 11249:
os.rename(filepath, filepath.replace('train', 'test'))
elif img_num > 9999:
os.rename(filepath, filepath.replace('train', 'valid'))
class CatsDogsDataset(Dataset):
def __init__(self, img_dir, transform=None):
self.img_dir = img_dir
self.img_names = [i for i in os.listdir(img_dir) if i.endswith('.jpg')] # 获取图像文件夹中以'.jpg'结尾的文件名,并存储在实例变量img_names中
self.y = []
for i in self.img_names:
if i.split('.')[0] == 'cat':
self.y.append(0)
else:
self.y.append(1)
self.transform = transform
def __getitem__(self, index): # 根据索引获取图像和标签
img = Image.open(os.path.join(self.img_dir, self.img_names[index]))
if self.transform is not None:
img = self.transform(img)
label = self.y[index]
return img, label
def __len__(self):
return len(self.y)
def calc_MeanAndStd():
custom_transform1 = transforms.Compose([transforms.Resize([64, 64]),
transforms.ToTensor()])
train_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'train'),
transform=custom_transform1)
train_loader = DataLoader(dataset=train_dataset,
batch_size=5000,
shuffle=False)
train_mean = []
train_std = []
for i, image in enumerate(train_loader, 0): # 将 train_loader 中的每个批次数据与其对应的索引进行配对,并从索引 0 开始递增
numpy_image = image[0].numpy()
batch_mean = np.mean(numpy_image, axis=(0, 2, 3)) # 计算批次的均值,对图像的通道维度进行求均值操作
batch_std = np.std(numpy_image, axis=(0, 2, 3)) # 对图像的通道维度求标准差
train_mean.append(batch_mean)
train_std.append(batch_std)
train_mean = torch.tensor(np.mean(train_mean, axis=0))
train_std = torch.tensor(np.mean(train_std, axis=0))
return train_mean, train_std
def compute_accuracy_and_loss(model, data_loader, device):
correct_pred, num_examples = 0, 0
cross_entropy = 0.
for i, (features, targets) in enumerate(data_loader):
features = features.to(device)
targets = targets.to(device)
logits, probas = model(features)
cross_entropy += F.cross_entropy(logits, targets).item()
_, predicted_labels = torch.max(probas, 1) # 找到概率最高的预测类别,并将其索引存储在 predicted_labels 中
num_examples += targets.size(0) # 将当前批次样本的数量加到 num_examples 变量中,用于计算准确率
correct_pred += (predicted_labels == targets).sum()
return correct_pred.float()/num_examples * 100, cross_entropy/num_examples
dataloader.py
负责加载数据集并对数据进行预处理,然后创建训练集和验证集的数据加载器
from config import *
from utils import *
import os
from torch.utils.data import DataLoader
from torchvision import transforms
train_mean, train_std = calc_MeanAndStd() # 用于数据标准化
# train_mean = [0.4875, 0.4544, 0.4164]
# train_std = [0.2521, 0.2453, 0.2481]
data_transforms = {
'train': transforms.Compose([
transforms.RandomRotation(5), # 随机旋转图像,旋转角度范围为-5度到+5度之间
transforms.RandomHorizontalFlip(), # 随机水平翻转图像
transforms.RandomResizedCrop(64, scale=(0.96, 1.0), ratio=(0.95, 1.05)), # 随机裁剪和缩放图像到指定的大小(64x64像素),裁剪比例在0.95到1.05之间,缩放比例在0.96到1.0之间
transforms.ToTensor(), # 将图像转换为张量
transforms.Normalize(train_mean, train_std) # 对图像进行标准化处理,将每个通道的像素值减去均值(mean)并除以标准差(std),以使得图像数据在每个通道上的均值为0,标准差为1
]),
'valid': transforms.Compose([
transforms.Resize([64, 64]),
transforms.ToTensor(),
transforms.Normalize(train_mean, train_std)
])
}
train_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'train'),
transform=data_transforms['train'])
valid_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'valid'),
transform=data_transforms['valid'])
test_dataset = CatsDogsDataset(img_dir=os.path.join('dogs-vs-cats', 'test'),
transform=data_transforms['valid'])
train_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
drop_last=True, # 如果最后一个批次的样本数量小于batch_size,则将其丢弃
shuffle=True)
valid_loader = DataLoader(dataset=valid_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
test_loader = DataLoader(dataset=test_dataset,
batch_size=BATCH_SIZE,
shuffle=False)
model.py
定义了VGG16,CNN模型的架构
import torch
import torch.nn.functional as F
import torch.nn as nn
class VGG16(nn.Module):
def __init__(self, num_classes):
super(VGG16, self).__init__()
# calculate same padding:
# (w - k + 2*p)/s + 1 = o
# => p = (s(o-1) - w + k)/2
self.layer_1 = nn.Sequential(
nn.Conv2d(in_channels=3,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=64,
out_channels=64,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.layer_2 = nn.Sequential(
nn.Conv2d(in_channels=64,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=128,
out_channels=128,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.layer_3 = nn.Sequential(
nn.Conv2d(in_channels=128,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=256,
out_channels=256,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.layer_4 = nn.Sequential(
nn.Conv2d(in_channels=256,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.layer_5 = nn.Sequential(
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.Conv2d(in_channels=512,
out_channels=512,
kernel_size=(3, 3),
stride=(1, 1),
padding=1),
nn.ReLU(),
nn.MaxPool2d(kernel_size=(2, 2),
stride=(2, 2))
)
self.classifier = nn.Sequential(
nn.Linear(512*2*2, 4096),
nn.ReLU(),
nn.Linear(4096, 4096),
nn.ReLU(),
nn.Linear(4096, num_classes)
)
for m in self.modules():
if isinstance(m, torch.nn.Conv2d): # 检查当前模块是否为 Conv2d 类型的卷积层。
# n = m.kernel_size[0] * m.kernel_size[1] * m.out_channels
# m.weight.data.normal_(0, np.sqrt(2. / n))
m.weight.detach().normal_(0, 0.05)
if m.bias is not None:
m.bias.detach().zero_()
elif isinstance(m, torch.nn.Linear):
m.weight.detach().normal_(0, 0.05)
m.bias.detach().detach().zero_()
def forward(self, x):
x = self.layer_1(x)
x = self.layer_2(x)
x = self.layer_3(x)
x = self.layer_4(x)
x = self.layer_5(x)
logits = self.classifier(x.view(-1, 512*2*2))
probas = F.softmax(logits, dim=1)
return logits, probas
class CNN(nn.Module):
def __init__(self, num_classes):
super(CNN, self).__init__()
self.layer1 = nn.Sequential( # 提取了128个特征图,尺寸减半,并且通过ReLU激活函数进行非线性变换,最后经过最大池化层尺寸再减半
nn.Conv2d(3, 128, kernel_size=3, padding=1),
nn.BatchNorm2d(128),
nn.ReLU(inplace=True),
nn.MaxPool2d(2)
)
self.layer2 = nn.Sequential(
nn.Conv2d(128, 256, kernel_size=3, padding=1),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(2)
)
self.layer3 = nn.Sequential(
nn.Conv2d(256, 512, kernel_size=3, padding=1),
nn.BatchNorm2d(512),
nn.ReLU(inplace=True),
nn.MaxPool2d(2)
)
self.fc1 = nn.Linear(512 * 8 * 8, 2048) # 经过三个layer图像尺寸缩小2^6=64,所以输入尺寸512*64
self.dropout = nn.Dropout()
self.fc2 = nn.Linear(2048, num_classes)
self.relu = nn.ReLU()
def forward(self, x):
x = self.layer1(x)
x = self.layer2(x)
x = self.layer3(x)
x = x.view(x.size(0), -1)
x = self.relu(self.fc1(x))
logits = self.fc2(x)
probas = F.softmax(logits, dim=1)
return logits, probas
train.py
定义了训练模型的函数。它迭代训练数据集,进行前向传播、反向传播和优化器更新。在每个训练轮次结束后,使用验证集进行模型的评估,并记录训练和验证的准确率和损失。最后,绘制训练和验证的准确率和损失曲线
from data_loader import *
import time
import torch
import torch.nn.functional as F
import matplotlib.pyplot as plt
def train(model, optimizer):
start_time = time.time()
train_acc_lst, valid_acc_lst = [], [] # 描点
train_loss_lst, valid_loss_lst = [], []
for epoch in range(NUM_EPOCHS):
model.train()
for batch_idx, (features, targets) in enumerate(train_loader):
# PREPARE MINIBATCH
features = features.to(DEVICE)
targets = targets.to(DEVICE)
# FORWARD AND BACK PROP
logits, probas = model(features)
cost = F.cross_entropy(logits, targets)
optimizer.zero_grad()
cost.backward()
optimizer.step()
# LOGGING
if not batch_idx % 120:
print(f'Epoch: {epoch + 1:03d}/{NUM_EPOCHS:03d} | '
f'Batch {batch_idx:03d}/{len(train_loader):03d} |'
f' Cost: {cost:.4f}')
model.eval()
with torch.set_grad_enabled(False):
train_acc, train_loss = compute_accuracy_and_loss(model, train_loader, device=DEVICE)
valid_acc, valid_loss = compute_accuracy_and_loss(model, valid_loader, device=DEVICE)
train_acc_lst.append(train_acc)
valid_acc_lst.append(valid_acc)
train_loss_lst.append(train_loss)
valid_loss_lst.append(valid_loss)
print(f'Epoch: {epoch + 1:03d}/{NUM_EPOCHS:03d} Train Acc.: {train_acc:.2f}%'
f' | Validation Acc.: {valid_acc:.2f}%')
elapsed = (time.time() - start_time)/60
print(f'Time elapsed: {elapsed:.2f} min')
elapsed = (time.time() - start_time)/60
print(f'Total Training Time: {elapsed:.2f} min')
# visualize loss
plt.plot(range(1, NUM_EPOCHS + 1), train_loss_lst, label='Training loss')
plt.plot(range(1, NUM_EPOCHS + 1), valid_loss_lst, label='Validation loss')
plt.legend(loc='upper right')
plt.ylabel('Cross entropy')
plt.xlabel('Epoch')
plt.show()
# visualize accuracy
plt.plot(range(1, NUM_EPOCHS + 1), train_acc_lst, label='Training accuracy')
plt.plot(range(1, NUM_EPOCHS + 1), valid_acc_lst, label='Validation accuracy')
plt.legend(loc='upper left')
plt.ylabel('Cross entropy')
plt.xlabel('Epoch')
plt.show()
val.py
包含了评估模型和展示结果的函数。它首先将模型设置为评估模式,然后计算测试集上的准确率和损失。接下来,使用训练集的图像数据进行预测,并将预测结果与真实标签进行比较,然后将预测结果展示出来。
from data_loader import *
import numpy as np
import torch
from torch.utils.data import DataLoader
import matplotlib.pyplot as plt
class UnNormalize(object): # 将经过标准化处理的图像数据恢复到原始的像素值范围
def __init__(self, mean, std):
self.mean = mean
self.std = std
def __call__(self, tensor): # Args: tensor (Tensor): Tensor image of size (C, H, W) to be normalized. Returns: Tensor: Normalized image.
for t, m, s in zip(tensor, self.mean, self.std):
t.mul_(s).add_(m) # The normalize code -> t.sub_(m).div_(s)
return tensor
def evaluation_and_show(model, test_loader):
model.eval()
with torch.set_grad_enabled(False):
test_acc, test_loss = compute_accuracy_and_loss(model, test_loader, DEVICE)
print(f'Test accuracy: {test_acc:.2f}')
unorm = UnNormalize(mean=train_mean, std=train_std)
test_loader = DataLoader(dataset=train_dataset,
batch_size=BATCH_SIZE,
shuffle=True)
for features, targets in test_loader:
break
_, predictions = model.forward(features[:8].to(DEVICE)) # 将这个批次的特征数据传递给模型进行前向传播,得到预测结果 predictions 取前 8 个样本进行预测。
predictions = torch.argmax(predictions, dim=1) # 对预测结果的每个样本,取出概率最高的类别索引,即预测的类别标签
d = {0: 'cat', 1: 'dog'}
fig, ax = plt.subplots(1, 8, figsize=(20, 10)) # 创建一个大小为 (20, 10) 的图像画布,并生成 1 行 8 列的子图
for i in range(8):
img = unorm(features[i])
ax[i].imshow(np.transpose(img, (1, 2, 0)))
ax[i].set_xlabel(d[predictions[i].item()])
plt.show()
main.py
print("🚀 Importing required files...")
from model import *
from val import *
from train import *
print("Loading down!\n")
print("Calculating dataset size ...")
calc_PicNum()
print("Calculation completed!\n")
show_Onepic(picName='cat.100.jpg')
print("Moving 2500 images from the training folder into a test set & val folder...")
move_pic()
print("Moving completed!\n")
device = torch.device(DEVICE)
torch.manual_seed(RANDOM_SEED)
print("Building model...")
# model = VGG16(num_classes=NUM_CLASSES)
# model = model.to(device)
model2 = CNN(num_classes=NUM_CLASSES)
model2 = model2.to(device)
print("Successfully build!\n")
# optimizer = torch.optim.Adam(model.parameters(), lr=LEARNING_RATE)
optimizer = torch.optim.Adam(model2.parameters(), lr=LEARNING_RATE)
print("🚀Training on ", device)
# train(model=model, optimizer=optimizer)
train(model=model2, optimizer=optimizer)
print("Training completed!\n")
print("Evaluating...")
# evaluation_and_show(model=model, test_loader=test_loader)
evaluation_and_show(model=model2, test_loader=test_loader)