此处以给定完整数据为例,在程序中对数据划分为训练集、验证集和测试集。
首先准备数据,下载解压记得文件路径就可以。下面开始代码过程:
总流程为数据准备》》数据读取》》数据载入》》数据预览》》搭建模型》》查看模型》》设置优化算法和损失函数》》模型训练》》模型评估》》读取图片预测输出》》保存模型
链接:https://pan.baidu.com/s/1YXX4v-PavtA6Nu-ALw2Onw
提取码:t1pi
#先导入可能用到的包
from __future__ import print_function, division
import torch
import torch.nn as nn
import torch.optim as optim
from torch.optim import lr_scheduler
import numpy as np
import torchvision
from torchvision import datasets, models, transforms
import torch.nn.functional as F
import matplotlib.pyplot as plt
import time
import os
import copy
#参数定义:包括batchsize、输入尺寸、学习率、工作并行个数等等,可根据自己需求定义
batch_size = 8
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#数据读取,将原始数据随机划分为三种数据训练集、验证集合测试集;前两种是用于模型的训练和调整,最后种是用于模型最终的评估
#随机变换都一样
#定义转换方式
#所有数据转换方式
#normMean = [0.4948052, 0.48568845, 0.44682974]
#normStd = [0.24580306, 0.24236229, 0.2603115]
#normTransform = transforms.Normalize(normMean, normStd)#均值和方差
#normMean = mean
#normStd = std
normTransform = transforms.Normalize(normMean, normStd)#均值和方差
data_transforms = transforms.Compose([
transforms.Resize((128,128)),
transforms.CenterCrop(128),
transforms.ToTensor()
#normTransform#均值方差归一化一定要在ToTensor之后
])
all_dataset = datasets.ImageFolder('D:/计算机视觉/num_data', transform=data_transforms)
class_names = tuple(all_dataset.classes)#获取标签
all_dataset_lenth = len(all_dataset)#获取所有数据长度
class_num = len(class_names)#获取分类总数
train, test, valid = torch.utils.data.random_split(dataset=all_dataset,
lengths=[int(all_dataset_lenth * 0.8), int(all_dataset_lenth * 0.15),
all_dataset_lenth - int(all_dataset_lenth * 0.8) - int(
all_dataset_lenth * 0.15)])
#len(valid)+len(train)+len(test) == len(all_dataset)#验证一下数据长度是否一致
#计算均值和方差,可以先执行上一步先把数据读取为Tensor然后再计算方差和均值,最后再把方差和均值填上去进行计算,一般要是使用迁移模型,就用几个比较固定的如
#normMean = [0.4948052, 0.48568845, 0.44682974]
#normStd = [0.24580306, 0.24236229, 0.2603115]
# def get_mean_std(dataset, ratio=0.01):
# """Get mean and std by sample ratio
# """
# dataloader = torch.utils.data.DataLoader(dataset, batch_size=int(len(dataset)*ratio),
# shuffle=True, num_workers=10)
# train = iter(dataloader).next()[0] # 一个batch的数据
# mean = np.mean(train.numpy(), axis=(0,2,3))
# std = np.std(train.numpy(), axis=(0,2,3))
# return mean, std
# mean, std = get_mean_std(all_dataset)#获取整个数据的均值和方差便于归一化
# print(mean,std)
# normMean = mean
# normStd = std
# normTransform = transforms.Normalize(normMean, normStd)#均值和方差
#数据加载
trainloader = torch.utils.data.DataLoader(train, batch_size=batch_size, shuffle=True)
testloader = torch.utils.data.DataLoader(test, batch_size=batch_size, shuffle=True)
validloader = torch.utils.data.DataLoader(valid, batch_size=batch_size, shuffle=True)
#数据预览
def imshow(inp, title='Data Show'):#若归一化,则要修改方差和均值
"""Imshow for Tensor."""
# 逆转操作,从 tensor 变回 numpy 数组需要转换通道位置
inp = inp.numpy().transpose((1, 2, 0))
mean = np.array([0.485, 0.456, 0.406])#当均值和方差不一样时这里不改变
std = np.array([0.229, 0.224, 0.225])
# 从归一化后变回原始图片
inp = std * inp + mean
inp = np.clip(inp, 0, 1)
plt.imshow(inp)
if title is not None:
plt.title(title)
plt.pause(0.001) # pause a bit so that plots are updated
images, labels = next(iter(trainloader))#不断迭代直至到尽头,只对其中一个批次#生成一个torch.Size([16, 3, 32, 32]),16为batchsize,3为RGB三通道,32高,32宽
img = torchvision.utils.make_grid(images, nrow = 4, padding = 10)#将多张图片合成网格一行放nrow图片,图片与图片间的距离为padding个像素
imshow(img)
print('对应的标签:')
print(' '.join('%5s' % class_names[labels[j]] for j in range(batch_size)))
#设置网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#输入32
self.conv1 = nn.Conv2d(3, 6, 5)#28
self.pool = nn.MaxPool2d(2, 2)#14
self.conv2 = nn.Conv2d(6, 16, 5)#10,然后加个池化就是5,
# 所以最后是16 * 5 * 5这也就是全连接层的固定尺寸,如果要修改时 要根据输入计算出相应的特征输出,并且修改第一个linear
# ,和view中的-1是为了匹配batch
#输入维度固定为16 * 5 * 5
#如果要修改尺寸的话,要根据输入图片尺寸经过网络算出特征图的尺寸,然后根据通道数修改第一个liner层输入参数以及x。view()的
# 参数该这两个地方就可以
self.fc1 = nn.Linear(16 * 29 * 29, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, class_num)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))#28
x = self.pool(F.relu(self.conv2(x)))#24
x = x.view(-1, 16 * 29 * 29)#flatten the output of conv2 to (batch_size, 32 * 7 * 7)
# 相当于一个分水岭
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.fc3(x)
return x
由于网络全连接层第一层的输入特征数会随着输入图片尺寸变化而变化,所以有必要对其进行计算,此处我们采用试验法:先将全连接层注释,再查看网络shape即可查看
#设置网络
class Net(nn.Module):
def __init__(self):
super(Net, self).__init__()
#输入32
self.conv1 = nn.Conv2d(3, 6, 5)#28
self.pool = nn.MaxPool2d(2, 2)#14
self.conv2 = nn.Conv2d(6, 16, 5)#10,然后加个池化就是5,
# 所以最后是16 * 5 * 5这也就是全连接层的固定尺寸,如果要修改时 要根据输入计算出相应的特征输出,并且修改第一个linear
# ,和view中的-1是为了匹配batch
#输入维度固定为16 * 5 * 5
#如果要修改尺寸的话,要根据输入图片尺寸经过网络算出特征图的尺寸,然后根据通道数修改第一个liner层输入参数以及x。view()的
# 参数该这两个地方就可以
self.fc1 = nn.Linear(16 * 29 * 29, 120)
self.fc2 = nn.Linear(120, 84)
self.fc3 = nn.Linear(84, class_num)
def forward(self, x):
x = self.pool(F.relu(self.conv1(x)))#28
x = self.pool(F.relu(self.conv2(x)))#24
# x = x.view(-1, 16 * 29 * 29)#flatten the output of conv2 to (batch_size, 32 * 7 * 7)
# 相当于一个分水岭
# x = F.relu(self.fc1(x))
# x = F.relu(self.fc2(x))
# x = self.fc3(x)
return x
net = Net()
y = net(images)
y.shape
输出如下,第一个8为batchsize,162929即为我们想要
#查看模型
net = Net()
print(net)
#设置优化函数和损失函数
lr = 0.001
optimizer = torch.optim.Adam(net.parameters(), lr=lr)
cost = nn.CrossEntropyLoss()
#训练模型#
#合并便于训练与评估,设置参数
dataloaders = {
'train':trainloader,
'val':validloader
}
dataset_sizes = {'train':len(train),'val':len(valid)}
class_names = all_dataset.classes
num_epochs = 5
# for inputs, labels in dataloaders['train']:
# print(inputs.shape)
#开始训练
print("Training on ", device)
net = net.to(device)
time_start = time.time()
for epoch in range(num_epochs):
print('Epoch {}/{}'.format(epoch, num_epochs - 1))
print('-' * 10)
for phase in ['train', 'val']:
# 注意训练和验证阶段,需要分别对 model 的设置
if phase == 'train':
print('Training......')
net.train(True) # 训练模式
else:
print('Valid......')
net.train(False) # 评估模式会关闭Dropout
running_loss = 0.0
running_corrects = 0
start = time.time()
for inputs, labels in dataloaders[phase]:
inputs = inputs.to(device)
labels = labels.to(device)
# 清空参数的梯度
optimizer.zero_grad()
# 只有训练阶段才追踪历史
with torch.set_grad_enabled(phase == 'train'):
outputs = net(inputs)
_, preds = torch.max(outputs, 1)
loss = cost(outputs, labels)
if phase == 'train':
loss.backward()
optimizer.step()
# 记录 loss 和 准确率
running_loss += loss.item() * inputs.size(0)
running_corrects += torch.sum(preds == labels.data)
epoch_loss = running_loss / dataset_sizes[phase]
epoch_acc = running_corrects.double() / dataset_sizes[phase]
print('{} Loss: {:.4f}; Acc: {:.4f}; Time: {:.4f}seconds'.format(phase, epoch_loss, epoch_acc, time.time() - start))
print("Training Finshed!!! Total Time Cost:{:.4f}second!!!".format(time.time() - time_start))
# 利用训练好的模型进行最终测试,总准确率
def evaluate_accuracy(data_iter, net, device=None):
if device is None and isinstance(net, torch.nn.Module):
# 如果没指定device就使用net的device
device = list(net.parameters())[0].device
acc_sum, n = 0.0, 0
with torch.no_grad():
for X, y in data_iter:
if isinstance(net, torch.nn.Module):
net.eval() # 评估模式, 这会关闭dropout
acc_sum += (net(X.to(device)).argmax(dim=1) == y.to(device)).float().sum().cpu().item()
net.train() # 改回训练模式
else:
if('is_training' in net.__code__.co_varnames): # 如果有is_training这个参数
# 将is_training设置成False
acc_sum += (net(X, is_training=False).argmax(dim=1) == y).float().sum().item()
else:
acc_sum += (net(X).argmax(dim=1) == y).float().sum().item()
n += y.shape[0]
return 100*acc_sum / n
Test_acc = evaluate_accuracy(testloader, net, device=None)
print(("Accuracy of the network on the {:.1f} test images is : {:.2f}%".format(len(test), Test_acc)))
# 利用训练好的模型进行最终测试,各分类准确率
class_correct = list(0. for i in range(class_num))
class_total = list(0. for i in range(class_num))
num_sum = 0
with torch.no_grad():
for data in testloader:
images, labels = data
outputs = net(images)
_, predicted = torch.max(outputs, 1)
c = (predicted == labels).squeeze()
for i in range(batch_size):#测试集要取到batch_size的整倍数个,否则会报错
num_sum = num_sum+1
if num_sum<len(test):
label = labels[i]
class_correct[label] += c[i].item()
class_total[label] += 1
else:
break;
for i in range(3):
print(("Accuracy of {} is : {:.2f}% ; Total num is : {:.0f}".format(class_names[i], 100 * class_correct[i] / class_total[i], class_total[i])))
#输入单张图片,随便找张图片来测试下预测的对不对
import PIL.Image
transform = transforms.Compose([
transforms.Resize((128,128)),
transforms.ToTensor()
])
filename='D:/计算机视觉/datanum/num_0/1509806985_268_7.bmp'
img= PIL.Image.open(filename).convert('RGB') #读取图像,转换为三维矩阵
img = transform(img)
img = img.resize(1,3,128,128)
outputs = net(img)
outputs
_, predicted = torch.max(outputs.data, 1)
print('预测的结果为{}'.format(class_names[predicted]))
#保存模型与加载模型
torch.save(net, 'net10.pkl')
loadnet = torch.load('net10.pkl')
loadnet.eval()