《pytorch图像分类》p2AlexNet网络基础及代码
一、零碎知识点
1.过拟合
模型假设过于复杂,参数过多,训练数据过少,噪声过多,导致拟合的函数完美的预测训练集,但对新数据的测试集预测结果差。 过度的拟合了训练数据,而没有考虑到泛化能力。
举个栗子:当我们在学习一门新的学科时会做一些例题,我们把这些例题完完整整背下来,但是一旦给出新的题目,还是不会做,这就是学习没有泛化能力,只记住了例题的细节而忽视了更普遍的规律。
2.使用dropout后的正向传播
dropout会在每一层当中随机失活一部分神经元,从而减少了神经元之间的共适应性,防止过拟合。
nn.Dropout(p= )
p代表的是随机失活的比例,默认p=0.5
3.正则化regularization
是一种通过添加额外的约束或惩罚项来控制模型的复杂度的技术,其目的也是防止过拟合。
假设我们的模型是一个二次多项式,我们的目标是最小化损失函数(均方误差MSE),让预测的曲线与真实值尽可能接近。
L2正则化将惩罚项加到损失函数中,使用权重的平方和乘以一个正则化系数λ。
带正则化的损失函数为:
L
o
s
s
=
M
S
E
+
λ
∗
∣
∣
w
∣
∣
2
Loss = MSE + λ * ||w||^2
Loss=MSE+λ∗∣∣w∣∣2
很抽象,以后再深入学习。
4.代码中所用的知识点
- 设备设置
如果有可以使用的gpu设备,默认使用第一个,没有的话即使用cpu。
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
- RandomResizedCrop随机裁剪
将裁剪后的图像调整为指定的大小(224x224)
transforms.RandomResizedCrop(224)
-
root=“. ./. .”
返回上上层目录 -
CrossEntropyLoss交叉熵损失函数
它是PyTorch中的一个损失函数,常用于多分类问题的训练中。它结合了Softmax激活函数和负对数似然损失(NLLLoss)
详情请见我之前写的博客:多分类问题 -
net.train( )和net.eval( )
当调用net.train()
时,模型将被设置为训练模式。在训练模式下,模型会启用一些特定的操作,如Batch Normalization归一化处理和Dropout随机失活一些神经元,防止过拟合。而调用net.eval()
时,模型将被设置为评估(evaluate)模式。
二、总体架构分析
1.ReLU激活函数
ReLU(Rectified Linear Unit)线性整流函数,又称修正线性单元。在PyTorch中,可以使用torch.nn.ReLU
类来表示。
其公式为:
f
(
x
)
=
M
a
x
(
0
,
x
)
f(x)=Max(0,x)
f(x)=Max(0,x)
它将小于零的输入映射为0,大于等于零的输入保持不变,求导简单方便。
我总结了这些天学习的几个激活函数:激活函数总结
2.手算
3.模型代码
nn.sequential将一系列的层结构打包组合成一个新的结构
classifer包括了三个全连接层
1.Conv1 第一个卷积层
代码表示为: nn.Conv2d(in_channels,out_channels,kernel_size,stride,padding)
彩色图像有rgb三个通道,所以输入通道数为3,输出通道数=卷积核的个数kernel_num
为了方便训练,卷积核个数只取一半,padding直接取2,训练效果与原本的影响不大
nn.Conv2d(3,48,kernel_size=11,stride=4,padding=2)
2.Maxpool1 第一个池化层
代码表示为:nn.MaxPool2d(kernel_size,stride)
nn.MaxPool2d(kernel_size=3,stride=2)
3.后续代码
nn.Conv2d(48,128,kernel_size=5,padding=2)
nn.MaxPool2d(kernel_size=3,stride=2)
nn.Conv2d(128,192,kernel_size=3,padding=1)
nn.Conv2d(192,192,kernel_size=3,padding=1)
nn.Conv2d(192,128,kernel_size=3,padding=1)
nn.MaxPool2d(kernel_size=3,stride=2)
三、训练花分类课程代码
1.model.py
import torch.nn as nn
import torch
class AlexNet(nn.Module):
def __init__(self, num_classes=1000, init_weights=False):
super(AlexNet, self).__init__()
self.features = nn.Sequential(
nn.Conv2d(3, 48, kernel_size=11, stride=4, padding=2), # input[3, 224, 224] output[48, 55, 55]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[48, 27, 27]
nn.Conv2d(48, 128, kernel_size=5, padding=2), # output[128, 27, 27]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 13, 13]
nn.Conv2d(128, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 192, kernel_size=3, padding=1), # output[192, 13, 13]
nn.ReLU(inplace=True),
nn.Conv2d(192, 128, kernel_size=3, padding=1), # output[128, 13, 13]
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=3, stride=2), # output[128, 6, 6]
)
self.classifier = nn.Sequential(
nn.Dropout(p=0.5),
nn.Linear(128 * 6 * 6, 2048),
nn.ReLU(inplace=True),
nn.Dropout(p=0.5),
nn.Linear(2048, 2048),
nn.ReLU(inplace=True),
nn.Linear(2048, num_classes),
)
if init_weights:
self._initialize_weights()
def forward(self, x):
x = self.features(x)
x = torch.flatten(x, start_dim=1)
x = self.classifier(x)
return x
def _initialize_weights(self):
for m in self.modules():
if isinstance(m, nn.Conv2d):
nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
if m.bias is not None:
nn.init.constant_(m.bias, 0)
elif isinstance(m, nn.Linear):
nn.init.normal_(m.weight, 0, 0.01)
nn.init.constant_(m.bias, 0)
2.train.py
import os
import sys
import json
import torch
import torch.nn as nn
from torchvision import transforms, datasets, utils
import matplotlib.pyplot as plt
import numpy as np
import torch.optim as optim
from tqdm import tqdm
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
print("using {} device.".format(device))
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224),
transforms.RandomHorizontalFlip(),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), # cannot 224, must (224, 224)
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])}
data_root = os.path.abspath(os.path.join(os.getcwd(), "../..")) # get data root path
image_path = os.path.join(data_root, "data_set", "flower_data") # flower data set path
assert os.path.exists(image_path), "{} path does not exist.".format(image_path)
train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"),
transform=data_transform["train"])
train_num = len(train_dataset)
# {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
flower_list = train_dataset.class_to_idx
cla_dict = dict((val, key) for key, val in flower_list.items())
# write dict into json file
json_str = json.dumps(cla_dict, indent=4)
with open('class_indices.json', 'w') as json_file:
json_file.write(json_str)
batch_size = 32
nw = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8]) # number of workers
print('Using {} dataloader workers every process'.format(nw))
train_loader = torch.utils.data.DataLoader(train_dataset,
batch_size=batch_size, shuffle=True,
num_workers=nw)
validate_dataset = datasets.ImageFolder(root=os.path.join(image_path, "val"),
transform=data_transform["val"])
val_num = len(validate_dataset)
validate_loader = torch.utils.data.DataLoader(validate_dataset,
batch_size=4, shuffle=True,
num_workers=nw)
print("using {} images for training, {} images for validation.".format(train_num,
val_num))
# test_data_iter = iter(validate_loader)
# test_image, test_label = test_data_iter.next()
#
# def imshow(img):
# img = img / 2 + 0.5 # unnormalize
# npimg = img.numpy()
# plt.imshow(np.transpose(npimg, (1, 2, 0)))
# plt.show()
#
# print(' '.join('%5s' % cla_dict[test_label[j].item()] for j in range(4)))
# imshow(utils.make_grid(test_image))
net = AlexNet(num_classes=5, init_weights=True)
net.to(device)
loss_function = nn.CrossEntropyLoss()
# pata = list(net.parameters())
optimizer = optim.Adam(net.parameters(), lr=0.0002)
epochs = 10
save_path = './AlexNet.pth'
best_acc = 0.0
train_steps = len(train_loader)
for epoch in range(epochs):
# train
net.train()
running_loss = 0.0
train_bar = tqdm(train_loader, file=sys.stdout)
for step, data in enumerate(train_bar):
images, labels = data
optimizer.zero_grad()
outputs = net(images.to(device))
loss = loss_function(outputs, labels.to(device))
loss.backward()
optimizer.step()
# print statistics
running_loss += loss.item()
train_bar.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
# validate
net.eval()
acc = 0.0 # accumulate accurate number / epoch
with torch.no_grad():
val_bar = tqdm(validate_loader, file=sys.stdout)
for val_data in val_bar:
val_images, val_labels = val_data
outputs = net(val_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += torch.eq(predict_y, val_labels.to(device)).sum().item()
val_accurate = acc / val_num
print('[epoch %d] train_loss: %.3f val_accuracy: %.3f' %
(epoch + 1, running_loss / train_steps, val_accurate))
if val_accurate > best_acc:
best_acc = val_accurate
torch.save(net.state_dict(), save_path)
print('Finished Training')
if __name__ == '__main__':
main()
3.predict.py
import os
import json
import torch
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
from model import AlexNet
def main():
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
data_transform = transforms.Compose(
[transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
# load image
img_path = "1..jpg"
assert os.path.exists(img_path), "file: '{}' dose not exist.".format(img_path)
img = Image.open(img_path)
plt.imshow(img)
# [N, C, H, W]
img = data_transform(img)
# expand batch dimension
img = torch.unsqueeze(img, dim=0)
# read class_indict
json_path = './class_indices.json'
assert os.path.exists(json_path), "file: '{}' dose not exist.".format(json_path)
with open(json_path, "r") as f:
class_indict = json.load(f)
# create model
model = AlexNet(num_classes=5).to(device)
# load model weights
weights_path = "./AlexNet.pth"
assert os.path.exists(weights_path), "file: '{}' dose not exist.".format(weights_path)
model.load_state_dict(torch.load(weights_path))
model.eval()
with torch.no_grad():
# predict class
output = torch.squeeze(model(img.to(device))).cpu()
predict = torch.softmax(output, dim=0)
predict_cla = torch.argmax(predict).numpy()
print_res = "class: {} prob: {:.3}".format(class_indict[str(predict_cla)],
predict[predict_cla].numpy())
plt.title(print_res)
for i in range(len(predict)):
print("class: {:10} prob: {:.3}".format(class_indict[str(i)],
predict[i].numpy()))
plt.show()
if __name__ == '__main__':
main()
1.jpg是郁金香
2.jpg是向日葵