李宏毅机器学习2022：homework3，食物图片分类任务笔记

EzcodingSen

于 2023-10-12 13:47:18 发布

阅读量151

点赞数

文章标签：机器学习分类笔记

本文链接：https://blog.csdn.net/hhhhmp/article/details/133789012

版权

import numpy as np
import pandas as pd
import torch
import os
import torch.nn as nn
import torchvision.transforms as transforms
from PIL import Image
# "ConcatDataset" and "Subset" are possibly useful when doing semi-supervised learning.
from torch.utils.data import ConcatDataset, DataLoader, Subset, Dataset
from torchvision.datasets import DatasetFolder, VisionDataset

# This is for the progress bar.
from tqdm.auto import tqdm
import random

_exp_name = "sample"

#随机数种子
myseed = 6666 # set a random seed for reproducibility
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False
np.random.seed(myseed)
torch.manual_seed(myseed)
if torch.cuda.is_available():
torch.cuda.manual_seed_all(myseed)

#transforms
# Normally, We don't need augmentations in testing and validation.
# All we need here is to resize the PIL image and transform it into Tensor.
test_tfm = transforms.Compose([
transforms.Resize((128, 128)),
transforms.ToTensor(),
])

# However, it is also possible to use augmentation in the testing phase.
# You may use train_tfm to produce a variety of images and then test using ensemble（整体的） methods
train_tfm = transforms.Compose([
# Resize the image into a fixed shape (height = width = 128)
transforms.Resize((128, 128)),
# You may add some transforms here.
# ToTensor() should be the last one of the transforms.
transforms.ToTensor(),
])

#Dataset
class FoodDataset(Dataset):

def __init__(self,path,tfm=test_tfm,files = None):
super(FoodDataset).__init__()
self.path = path
# #二者等价，这写法头一次见，长见识了
# a=sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
# b=[]
# for x in os.listdir(path):
# if x.endswith(".jpg"):
# x=os.path.join(path,x)
# b.append(x)
# b=sorted(b)
# for i in range(len(a)):
# if a[i]==b[i]:print('!')
# else:print('?')
# input()
self.files = sorted([os.path.join(path,x) for x in os.listdir(path) if x.endswith(".jpg")])
if files != None:
self.files = files
print(f"One {path} sample",self.files[0])
self.transform = tfm

def __len__(self):
return len(self.files)
#将path放在列表，读取图片，获取label都通过列表index，来实现getitem效果
def __getitem__(self,idx):
fname = self.files[idx]
#图像读取与处理
im = Image.open(fname)
im = self.transform(im)
# im:[3x128x128]
#im = self.data[idx]
try:
label = int(fname.split("/")[-1].split("_")[0])
except:
label = -1 # test has no label
return im,label


class Classifier(nn.Module):
def __init__(self):
super(Classifier, self).__init__()
# torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride, padding)
# torch.nn.MaxPool2d(kernel_size, stride, padding)
# input 維度 [3, 128, 128]
self.cnn = nn.Sequential(
# Conv2d参数：输入channel，输出channel，kernel size，步长，padding大小（周围加几圈）
nn.Conv2d(3, 64, 3, 1, 1), # [64, 128, 128]
nn.BatchNorm2d(64),#参数channel数
nn.ReLU(),#激活函数，每一层加一个
#Maxool2d参数：kernel size，步长，padding的大小（同上）
nn.MaxPool2d(2, 2, 0), # [64, 64, 64]

nn.Conv2d(64, 128, 3, 1, 1), # [128, 64, 64]
nn.BatchNorm2d(128),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [128, 32, 32]

nn.Conv2d(128, 256, 3, 1, 1), # [256, 32, 32]
nn.BatchNorm2d(256),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [256, 16, 16]

nn.Conv2d(256, 512, 3, 1, 1), # [512, 16, 16]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 8, 8]

nn.Conv2d(512, 512, 3, 1, 1), # [512, 8, 8]
nn.BatchNorm2d(512),
nn.ReLU(),
nn.MaxPool2d(2, 2, 0), # [512, 4, 4]
)
self.fc = nn.Sequential(
nn.Linear(512*4*4, 1024),
nn.ReLU(),
nn.Linear(1024, 512),
nn.ReLU(),
nn.Linear(512, 11)
#损失函数使用cross entropy，自带一个softmax（）激活函数，这里就不多添加
)

def forward(self, x):
out = self.cnn(x)
# out现在大小是[batch size，512，4，4]，out.size()[0]就是batch size
out = out.view(out.size()[0], -1)
# print(out.size())
# input()
# 输出是：torch.Size([64, 8192])
# out.view的作用就是resize tensor的维度，但是其中数据不变，第0维是out.size()[0]，也就是batch size，第一维-1，代表剩下的所有维度内的数据展开为1维
return self.fc(out)

batch_size = 64
_dataset_dir = "./food11"
# Construct datasets.
# The argument "loader" tells how torchvision reads the data.
train_set = FoodDataset(os.path.join(_dataset_dir,"training"), tfm=train_tfm)
train_loader = DataLoader(train_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
valid_set = FoodDataset(os.path.join(_dataset_dir,"validation"), tfm=test_tfm)
valid_loader = DataLoader(valid_set, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)

# "cuda" only when GPUs are available.
device = "cuda" if torch.cuda.is_available() else "cpu"

# The number of training epochs and patience.
n_epochs = 20
#early stop的值：找到一个acc更高的模型那么就保存模型，若是当前模型的acc还不如之前最好的模型，那么early stop加1，加到设置的值就停止训练，这样可以适时的停止训练，之前的homework也有
patience = 300 # If no improvement in 'patience' epochs, early stop

# Initialize a model, and put it on the device specified.
model = Classifier().to(device)

# For the classification task, we use cross-entropy as the measurement of performance.
# 损失函数
criterion = nn.CrossEntropyLoss()

# Initialize optimizer, you may fine-tune some hyperparameters such as learning rate on your own.
# 优化器
optimizer = torch.optim.Adam(model.parameters(), lr=0.0003, weight_decay=1e-5)

# Initialize trackers, these are not parameters and should not be changed
stale = 0
best_acc = 0

for epoch in range(n_epochs):

# ---------- Training ----------
# Make sure the model is in train mode before training.
model.train()

# These are used to record information in training.
train_loss = []
train_accs = []

for batch in tqdm(train_loader):#这里用tqdm取代enumerate

# A batch consists of image data and corresponding labels.
imgs, labels = batch
# 大小：
# imgs:[64x3x128x128]
# label:[64]

#imgs = imgs.half()
#print(imgs.shape,labels.shape)

# Forward the data. (Make sure data and model are on the same device.)
logits = model(imgs.to(device))

# Calculate the cross-entropy loss.
# We don't need to apply softmax before computing cross-entropy as it is done automatically.
loss = criterion(logits, labels.to(device))

# Gradients stored in the parameters in the previous step should be cleared out first.
optimizer.zero_grad()

# Compute the gradients for parameters.
loss.backward()

# Clip the gradient norms for stable training.
grad_norm = nn.utils.clip_grad_norm_(model.parameters(), max_norm=10)#为解决梯度爆炸，将gradient的最大值进行限制

# Update the parameters with computed gradients.
optimizer.step()

# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()
#argmax(dim=-1)返回最后1维的最大值的序列号，因为是cross entropy，最后的fc输出是64x11，经过cross entropy里面的softmax之后变成概率，index就等于label号。最后.mean()得出这个batch的acc平均值

# Record the loss and accuracy.
train_loss.append(loss.item())
train_accs.append(acc)
#所有batch的acc平均值加到列表里

train_loss = sum(train_loss) / len(train_loss)
train_acc = sum(train_accs) / len(train_accs)#总的平均值

# Print the information.
print(f"[ Train | {epoch + 1:03d}/{n_epochs:03d} ] loss = {train_loss:.5f}, acc = {train_acc:.5f}")

# ---------- Validation ----------
# Make sure the model is in eval mode so that some modules like dropout are disabled and work normally.
model.eval()

# These are used to record information in validation.
valid_loss = []
valid_accs = []

# Iterate the validation set by batches.
for batch in tqdm(valid_loader):

# A batch consists of image data and corresponding labels.
imgs, labels = batch
#imgs = imgs.half()

# We don't need gradient in validation.
# Using torch.no_grad() accelerates the forward process.
with torch.no_grad():
logits = model(imgs.to(device))

# We can still compute the loss (but not the gradient).
loss = criterion(logits, labels.to(device))

# Compute the accuracy for current batch.
acc = (logits.argmax(dim=-1) == labels.to(device)).float().mean()

# Record the loss and accuracy.
valid_loss.append(loss.item())
valid_accs.append(acc)
#break

# The average loss and accuracy for entire validation set is the average of the recorded values.
valid_loss = sum(valid_loss) / len(valid_loss)
valid_acc = sum(valid_accs) / len(valid_accs)

# Print the information.
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

# update logs
if valid_acc > best_acc:
with open(f"./{_exp_name}_log.txt","a"):
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f} -> best")
else:
with open(f"./{_exp_name}_log.txt","a"):
print(f"[ Valid | {epoch + 1:03d}/{n_epochs:03d} ] loss = {valid_loss:.5f}, acc = {valid_acc:.5f}")

# save models
if valid_acc > best_acc:
print(f"Best model found at epoch {epoch}, saving model")
torch.save(model.state_dict(), f"{_exp_name}_best.ckpt") # only save best to prevent output memory exceed error
best_acc = valid_acc
stale = 0
else:
stale += 1
if stale > patience:
print(f"No improvment {patience} consecutive epochs, early stopping")
break

test_set = FoodDataset(os.path.join(_dataset_dir,"test"), tfm=test_tfm)
test_loader = DataLoader(test_set, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)

model_best = Classifier().to(device)
model_best.load_state_dict(torch.load(f"{_exp_name}_best.ckpt"))
model_best.eval()
prediction = []
with torch.no_grad():
for data,_ in test_loader:
test_pred = model_best(data.to(device))
test_label = np.argmax(test_pred.cpu().data.numpy(), axis=1)
prediction += test_label.squeeze().tolist()

#交作业用的
#create test csv
# def pad4(i):
# return "0"*(4-len(str(i)))+str(i)
# df = pd.DataFrame()
# df["Id"] = [pad4(i) for i in range(1,len(test_set)+1)]
# df["Category"] = prediction
# df.to_csv("submission.csv",index = False)