记录一个简单的神经网络模型流程
通过简单模型,看透神经网络本质
1.模型model
定义模型的__init__和 forward()
随意定义的两层全连接层,和一个Relu激活函数,就构成简单的神经网络模型,也可以随意添加卷积层nn.Conv2d,和各种主流模型本质上没区别,只是主流模型效果更好,设计更好。
2.训练train
(1)数据预处理方式
使用transforms类进行数据的处理,包括剪裁,缩放,翻转等增强数据,normalize归一化数据
(2)数据载入
用torchvision.datasets.ImageFolder读取本地数据,torch.utils.data.DataLoader 批次batchsize载入数据
(3)定义模型、损失函数、优化器等
一般使用交叉熵损失nn.CrossEntropyLoss(),和optim.Adam优化器
(4)执行一个epoch的训练
初始梯度置0,执行模型前向计算,计算损失,损失反向传播,优化器调整权重参数,记录训练进度
(5)执行一个epoch后的 测试集 测试和模型保存
model 前向计算后,获得分类最大概率,验证是否正确
3.预测predict
(1)和训练时同样的数据预处理方式
预处理方式不同,结果会有影响,要和训练时一致
(2)载入图像、定义模型、加载模型权重
im.convert 是因为截图或网上的图很多是32位深度,改为27位RGB
model.load_state_dict 加载模型
(3)预测并获取结果
softmax是为了满足概率分布
4.完整代码
import torch
import torch.nn as nn
import torch.nn.functional as F
import matplotlib.pyplot as plt
import torchvision.transforms as transforms
import os
import torchvision
import torch.utils.data as dl
import torch.optim as optim
from PIL import Image
class MyNet(nn.Module):
def __init__(self, in_channel=3, hid_channel=10, out_channel=5) -> None:
super().__init__()
#随意写的2个全连接层构成模型
self.hidden = nn.Linear(224*224*in_channel, hid_channel)
self.relu = nn.ReLU(inplace=True)
self.out = nn.Linear(hid_channel, out_channel)
def forward(self, x):
x = torch.flatten(x, 1)
x = self.hidden(x)
x = self.relu(x)
x = self.out(x)
return x
def main():
runMode = 'predict'
if runMode == 'train':
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
# 数据预处理
data_transform = {
"train": transforms.Compose([transforms.RandomResizedCrop(224), transforms.RandomHorizontalFlip(),
transforms.ToTensor(), transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))]),
"val": transforms.Compose([transforms.Resize((224, 224)), transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
}
data_root = os.path.abspath(os.path.join(os.getcwd(), ".."))
image_path = data_root+"/dataSet/flower_data"
# 训练集
train_dataset = torchvision.datasets.ImageFolder(
root=image_path+"/train", transform=data_transform["train"]
)
train_num = len(train_dataset)
# 测试集
val_dataset = torchvision.datasets.ImageFolder(
root=image_path+"/val", transform=data_transform["val"]
)
val_num = len(val_dataset)
batch_size = 16
train_loader = dl.DataLoader(
train_dataset, batch_size=batch_size, shuffle=True, num_workers=0)
val_dataset = torchvision.datasets.ImageFolder(
root=image_path+"/val", transform=data_transform["val"])
val_num = len(val_dataset)
val_loader = dl.DataLoader(
val_dataset, batch_size=batch_size, shuffle=False, num_workers=0)
print("using {} images for training, {} images for validation.".format(
train_num, val_num))
model = MyNet()
model.to(device)
lossfun = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.0001)
best_acc = 0.0
save_path = './model/{}.pth'.format("myNet")
# 迭代轮数
epochs = 30
for epoch in range(epochs):
model.train() # 训练模式
running_loss = 0
for step, data in enumerate(train_loader, start=0):
optimizer.zero_grad()
inputs, labels = data
x = model(inputs.to(device))
loss = lossfun(x, labels.to(device))
loss.backward()
optimizer.step()
# printf statistics
running_loss += loss.item()
train_loader.desc = "train epoch[{}/{}] loss:{:.3f}".format(epoch + 1,
epochs,
loss)
rate = (step+1)/len(train_loader)
a = "*"*int(rate*50)
b = "*"*int((1-rate)*50)
print(
"\r train loss :{:^3.0f}%[{}->{}]{:.3f}".format(int(rate*100), a, b, loss), end="")
# val
model.eval()
acc = 0.0
with torch.no_grad(): # 不计算损失梯度
for step, data in enumerate(val_loader, start=0):
test_images, test_labels = data
outputs = model(test_images.to(device))
predict_y = torch.max(outputs, dim=1)[1]
acc += (predict_y == test_labels.to(device)).sum().item()
accurate_test = acc/val_num
if accurate_test > best_acc:
best_acc = accurate_test
torch.save(model.state_dict(), save_path)
print('[epoch %d] train loss:%.3f test_acc:%.3f ' %
(epoch+1, running_loss/step, acc/val_num))
print('Finish training')
elif runMode == 'predict':
#用这个简单的模型进行预测
#数据预处理
transform = transforms.Compose(
[
transforms.Resize((224, 224)),
transforms.ToTensor(),
transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])
im = Image.open('4.jpg')
im = im.convert("RGB")
im = transform(im) # [C, H, W] 数据预处理
im = torch.unsqueeze(im, dim=0) # [N, C, H, W]
model=MyNet()
model.load_state_dict(torch.load('model\myNet.pth'))
model.eval()
with torch.no_grad():
out=model(im)
outputs = torch.squeeze(out)
predict = torch.softmax(outputs, dim=0)
predict_cla = torch.argmax(predict).numpy()
print( str(predict_cla),predict[predict_cla].item())
if __name__ == '__main__':
main()