PyTorch学习笔记6

PyTorch学习笔记6

整理笔记视频来源
CNN-Image-Classification

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
# torchvision是独立于pytorch的关于图像操作的一些方便工具库。
# torchvision的详细介绍在:https://pypi.org/project/torchvision/0.1.8/
# torchvision主要包括一下几个包:
# vision.datasets : 几个常用视觉数据集,可以下载和加载
# vision.models : 流行的模型,例如 AlexNet, VGG, and ResNet 以及 与训练好的参数。
# vision.transforms : 常用的图像操作,例如:随机切割,旋转等。
# vision.utils : 用于把形似 (3 x H x W) 的张量保存到硬盘中,给一个mini-batch的图像可以产生一个图像格网。

print("PyTorch Version: ",torch.__version__)
'''PyTorch Version:  1.0.1.post2'''

1、加载数据

torch.manual_seed(53113)  #cpu随机种子

#没gpu下面可以忽略
use_cuda = torch.cuda.is_available()  
device = torch.device("cuda" if use_cuda else "cpu")  
batch_size = test_batch_size = 32  
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}


#torch.utils.data.DataLoader在训练模型时使用到此函数,用来把训练数据分成多个batch,
#此函数每次抛出一个batch数据,直至把所有的数据都抛出,也就是个数据迭代器。
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./mnist_data', 
                   train=True, #如果true,从training.pt创建数据集
                   download=True, #如果ture,从网上自动下载
                   
#transform 接受一个图像返回变换后的图像的函数,相当于图像先预处理下
#常用的操作如 ToTensor, RandomCrop,Normalize等. 
#他们可以通过transforms.Compose被组合在一起 
                   transform=transforms.Compose([
                       
                       transforms.ToTensor(), 
#.ToTensor()将shape为(H, W, C)的nump.ndarray或img转为shape为(C, H, W)的tensor,
#其将每一个数值归一化到[0,1],其归一化方法比较简单,直接除以255即可。
                       
                       transforms.Normalize((0.1307,), (0.3081,)) # 所有图片像素均值和方差
#.Normalize作用就是.ToTensor将输入归一化到(0,1)后,再使用公式”(x-mean)/std”,将每个元素分布到(-1,1)  
                   ])), # 第一个参数dataset:数据集
    batch_size=batch_size, 
    shuffle=True,  #随机打乱数据
    **kwargs)##kwargs是上面gpu的设置
  


test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('./mnist_data', 
                   train=False, #如果False,从test.pt创建数据集
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, 
    shuffle=True, 
    **kwargs)
    train_loader.dataset[0][0].shape
    '''torch.Size([1, 28, 28])'''

2、定义CNN模型

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 20, 5, 1) 
        #torch.nn.Conv2d(in_channels, out_channels, kernel_size, stride=1)
        #in_channels:输入图像通道数,手写数字图像为1,彩色图像为3
        #out_channels:输出通道数,这个等于卷积核的数量
        #kernel_size:卷积核大小
        #stride:步长
         
        self.conv2 = nn.Conv2d(20, 50, 5, 1)
        #上个卷积网络的out_channels,就是下一个网络的in_channels,所以这里是20
        #out_channels:卷积核数量50
        
        
        self.fc1 = nn.Linear(4*4*50, 500)
        #全连接层torch.nn.Linear(in_features, out_features)
        #in_features:输入特征维度,4*4*50是自己算出来的,跟输入图像维度有关
        #out_features;输出特征维度
        
        self.fc2 = nn.Linear(500, 10)
        #输出维度10,10分类

    def forward(self, x):  
        #print(x.shape)  #手写数字的输入维度,(N,1,28,28), N为batch_size
        x = F.relu(self.conv1(x)) # x = (N,50,24,24)
        x = F.max_pool2d(x, 2, 2) # x = (N,50,12,12)
        x = F.relu(self.conv2(x)) # x = (N,50,8,8)
        x = F.max_pool2d(x, 2, 2) # x = (N,50,4,4)
        x = x.view(-1, 4*4*50)    # x = (N,4*4*50)
        x = F.relu(self.fc1(x))   # x = (N,4*4*50)*(4*4*50, 500)=(N,500)
        x = self.fc2(x)           # x = (N,500)*(500, 10)=(N,10)
        return F.log_softmax(x, dim=1)  #带log的softmax分类,每张图片返回10个概率

3、初始化模型和定义优化函数

lr = 0.01
momentum = 0.5
model = Net().to(device) #模型初始化
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum) #定义优化器

NLL loss的定义

ℓ(𝑥,𝑦)=𝐿={𝑙1,…,𝑙𝑁}⊤,𝑙𝑛=−𝑤𝑦𝑛𝑥𝑛,𝑦𝑛,𝑤𝑐=weight[𝑐]⋅𝟙{𝑐≠ignore_index}
4、定义训练和测试模型

def train(model, device, train_loader, optimizer, epoch, log_interval=100):
    model.train() #进入训练模式
    for batch_idx, (data, target) in enumerate(train_loader):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() #梯度归零
        output = model(data)  #输出的维度[N,10] 这里的data是函数的forward参数x
        loss = F.nll_loss(output, target) #这里loss求的是平均数,除以了batch
#F.nll_loss(F.log_softmax(input), target) :
#单分类交叉熵损失函数,一张图片里只能有一个类别,输入input的需要softmax
#还有一种是多分类损失函数,一张图片有多个类别,输入的input需要sigmoid
        
        loss.backward()
        optimizer.step()
        if batch_idx % log_interval == 0:
            print("Train Epoch: {} [{}/{} ({:0f}%)]\tLoss: {:.6f}".format(
                epoch, 
                batch_idx * len(data), #100*32
                len(train_loader.dataset), #60000
                100. * batch_idx / len(train_loader), #len(train_loader)=60000/32=1875
                loss.item()
            ))
            #print(len(train_loader))
def test(model, device, test_loader):
    model.eval() #进入测试模式
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data) 
            test_loss += F.nll_loss(output, target, reduction='sum').item() # sum up batch loss
            #reduction='sum'代表batch的每个元素loss累加求和,默认是mean求平均
                       
            pred = output.argmax(dim=1, keepdim=True) # get the index of the max log-probability
            
            #print(target.shape) #torch.Size([32])
            #print(pred.shape) #torch.Size([32, 1])
            correct += pred.eq(target.view_as(pred)).sum().item()
            #pred和target的维度不一样
            #pred.eq()相等返回1,不相等返回0,返回的tensor维度(32,1)。

    test_loss /= len(test_loader.dataset)

    print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format(
        test_loss, correct, len(test_loader.dataset),
        100. * correct / len(test_loader.dataset)))

5、查看运行结果

epochs = 2
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

save_model = True
if (save_model):
    torch.save(model.state_dict(),"mnist_cnn.pt") 
    #词典格式,model.state_dict()只保存模型参数
#同上
torch.manual_seed(53113)

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
batch_size = test_batch_size = 32
kwargs = {'num_workers': 1, 'pin_memory': True} if use_cuda else {}
train_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./fashion_mnist_data', train=True, download=True,
                   transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,)) 
                   ])),
    batch_size=batch_size, shuffle=True, **kwargs)
test_loader = torch.utils.data.DataLoader(
    datasets.FashionMNIST('./fashion_mnist_data', train=False, transform=transforms.Compose([
                       transforms.ToTensor(),
                       transforms.Normalize((0.1307,), (0.3081,))
                   ])),
    batch_size=test_batch_size, shuffle=True, **kwargs)


lr = 0.01
momentum = 0.5
model = Net().to(device)
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

epochs = 2
for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)

save_model = True
if (save_model):
    torch.save(model.state_dict(),"fashion_mnist_cnn.pt")

CNN模型的迁移学习

  1. 很多时候当我们需要训练一个新的图像分类任务,我们不会完全从一个随机的模型开始训练,而是利用_预训练_的模型来加速训练的过程。我们经常使用在ImageNet上的预训练模型。

  2. 这是一种transfer learning的方法。我们常用以下两种方法做迁移学习。
    2.1 fine tuning: 从一个预训练模型开始,我们改变一些模型的架构,然后继续训练整个模型的参数。
    2.2 feature extraction: 我们不再改变与训练模型的参数,而是只更新我们改变过的部分模型参数。我们之所以叫它feature extraction是因为我们把预训练的CNN模型当做一个特征提取模型,利用提取出来的特征做来完成我们的训练任务。
    以下是构建和训练迁移学习模型的基本步骤:

  3. 初始化预训练模型

  4. 把最后一层的输出层改变成我们想要分的类别总数

  5. 定义一个optimizer来更新参数

  6. 模型训练

import numpy as np
import torchvision
from torchvision import datasets, transforms, models

import matplotlib.pyplot as plt
import time
import os
import copy
print("Torchvision Version: ",torchvision.__version__)
'''Torchvision Version:  0.2.2'''

数据
我们会使用hymenoptera_data数据集.

这个数据集包括两类图片, bees 和 ants, 这些数据都被处理成了可以使用ImageFolder https://pytorch.org/docs/stable/torchvision/datasets.html#torchvision.datasets.ImageFolder来读取的格式。我们只需要把data_dir设置成数据的根目录,然后把model_name设置成我们想要使用的与训练模型: :: [resnet, alexnet, vgg, squeezenet, densenet, inception]

其他的参数有:

  1. num_classes表示数据集分类的类别数
  2. batch_size
  3. num_epochs
  4. feature_extract表示我们训练的时候使用fine tuning还是feature extraction方法。如果feature_extract = False,整个模型都会被同时更新。如果feature_extract = True,只有模型的最后一层被更新。
    1、查看数据,只是查看作用
# Top level data directory. Here we assume the format of the directory conforms 
#   to the ImageFolder structure
data_dir = "./hymenoptera_data"
# Batch size for training (change depending on how much memory you have)
batch_size = 32


#蜜蜂和蚂蚁数据集不会自动下载,请到群文件下载,并放在当前代码目录下
#os.path.join() 连接路径,相当于.../data_dir/train
all_imgs = datasets.ImageFolder(os.path.join(data_dir, "train"),
                                transforms.Compose([
        transforms.RandomResizedCrop(input_size), #把每张图片变成resnet需要输入的维度224
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
    ]))
loader = torch.utils.data.DataLoader(all_imgs, batch_size=batch_size, shuffle=True, num_workers=4)
#训练数据分batch,变成tensor迭代器
img = next(iter(loader))[0] #这个img是一个batch的tensor
img.shape
'''torch.Size([32, 3, 224, 224])'''
unloader = transforms.ToPILImage()  # reconvert into PIL image
#transforms:torchvision的子模块,常用的图像操作
#.ToPILImage() 把tensor或数组转换成图像
#详细转换过程可以看这个:https://blog.csdn.net/qq_37385726/article/details/81811466

plt.ion() #交互模式,默认是交互模式,可以不写
#详细了解看这个:https://blog.csdn.net/SZuoDao/article/details/52973621
#plt.ioff()

def imshow(tensor, title=None):
    image = tensor.cpu().clone()  # we clone the tensor to not do changes on it
    image = image.squeeze(0)      # remove the fake batch dimension 
    #这个.squeeze(0)看不懂,去掉也可以运行
    
    image = unloader(image) #tensor转换成图像
    plt.imshow(image)
    if title is not None:
        plt.title(title)
    plt.pause(1) # pause a bit so that plots are updated
    #可以去掉看看,只是延迟显示作用


plt.figure()
imshow(img[8], title='Image') 
imshow(img[9], title='Image')
imshow(img[10], title='Image')

3、把训练集和验证集分batch转换成迭代器
现在我们知道了模型输入的size,我们就可以把数据预处理成相应的格式。

data_transforms = {
    "train": transforms.Compose([
        transforms.RandomResizedCrop(input_size),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
    "val": transforms.Compose([
        transforms.Resize(input_size),
        transforms.CenterCrop(input_size),
        transforms.ToTensor(),
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
    ]),
}

print("Initializing Datasets and Dataloaders...")


# Create training and validation datasets
image_datasets = {x: datasets.ImageFolder(os.path.join(data_dir, x), data_transforms[x]) for x in ['train', 'val']}
# Create training and validation dataloaders
dataloaders_dict = {x: torch.utils.data.DataLoader(image_datasets[x], batch_size=batch_size, shuffle=True, num_workers=4) for x in ['train', 'val']}
#把迭代器存放到字典里作为value,key是train和val,后面调用key即可。

# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
inputs, labels=next(iter(dataloaders_dict["train"])) #一个batch
print(inputs.shape)
print(labels)
'''torch.Size([32, 3, 224, 224])
tensor([1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0,
        0, 1, 1, 1, 0, 0, 1, 0])'''
for inputs, labels in dataloaders_dict["train"]:
    #print(inputs)
    #print(labels)
    print(labels.size()) #最后一个batch不足32
    '''torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([32])
torch.Size([20])'''

4、加载resnet模型并修改全连接层

# Models to choose from [resnet, alexnet, vgg, squeezenet, densenet, inception]
model_name = "resnet"
# Number of classes in the dataset
num_classes = 2
# Number of epochs to train for 
num_epochs = 2
# Flag for feature extracting. When False, we finetune the whole model, 
#   when True we only update the reshaped layer params
feature_extract = True  #只更新修改的层
def set_parameter_requires_grad(model, feature_extracting):
    if feature_extracting:
        for param in model.parameters():
            param.requires_grad = False #提取的参数梯度不更新
def initialize_model(model_name, num_classes, feature_extract, use_pretrained=True):
    if model_name == "resnet":
        model_ft = models.resnet18(pretrained=use_pretrained) 
        #如果True,从imagenet上返回预训练的模型和参数
        
        set_parameter_requires_grad(model_ft, feature_extract)#提取的参数梯度不更新
        #print(model_ft) 可以打印看下
        num_ftrs = model_ft.fc.in_features 
        #model_ft.fc是resnet的最后全连接层
        #(fc): Linear(in_features=512, out_features=1000, bias=True)
        #in_features 是全连接层的输入特征维度
        #print(num_ftrs)
        model_ft.fc = nn.Linear(num_ftrs, num_classes)
        #out_features=1000 改为 num_classes=2
        input_size = 224 #resnet18网络输入图片维度是224,resnet34,50,101,152也是
        
    return model_ft, input_size
model_ft, input_size = initialize_model(model_name, num_classes, feature_extract, use_pretrained=True)
print(model_ft)

5、查看需要更新的参数、定义优化器

next(iter(model_ft.named_parameters()))
len(next(iter(model_ft.named_parameters()))) #是元组,只有两个值
#2
for name,param in model_ft.named_parameters():
    print(name) #看下都有哪些参数
# Send the model to GPU
model_ft = model_ft.to(device)

# Gather the parameters to be optimized/updated in this run. If we are
#  finetuning we will be updating all parameters. However, if we are 
#  doing feature extract method, we will only update the parameters
#  that we have just initialized, i.e. the parameters with requires_grad
#  is True.
params_to_update = model_ft.parameters() #需要更新的参数
print("Params to learn:")
if feature_extract:
    params_to_update = [] #需要更新的参数存放在此
    for name,param in model_ft.named_parameters(): 
        #model_ft.named_parameters()有啥看上面cell
        if param.requires_grad == True: 
#这里要知道全连接层之前的层param.requires_grad == Flase
#后面加的全连接层param.requires_grad == True
            params_to_update.append(param)
            print("\t",name)
else: #否则,所有的参数都会更新
    for name,param in model_ft.named_parameters():
        if param.requires_grad == True:
            print("\t",name)

# Observe that all parameters are being optimized
optimizer_ft = optim.SGD(params_to_update, lr=0.001, momentum=0.9) #定义优化器
# Setup the loss fxn
criterion = nn.CrossEntropyLoss() #定义损失函数
'''Params to learn:
	 fc.weight
	 fc.bias'''

6、定义训练模型

#训练测试合一起了
def train_model(model, dataloaders, criterion, optimizer, num_epochs=5):
    since = time.time()
    val_acc_history = [] 
    best_model_wts = copy.deepcopy(model.state_dict())#深拷贝上面resnet模型参数
#.copy和.deepcopy区别看这个:https://blog.csdn.net/u011630575/article/details/78604226 
    best_acc = 0.
    for epoch in range(num_epochs):
        print("Epoch {}/{}".format(epoch, num_epochs-1))
        print("-"*10)
        
        for phase in ["train", "val"]:
            running_loss = 0.
            running_corrects = 0.
            if phase == "train":
                model.train()
            else: 
                model.eval()
            
            for inputs, labels in dataloaders[phase]:
                inputs = inputs.to(device)
                labels = labels.to(device)
                
                with torch.autograd.set_grad_enabled(phase=="train"):
                    #torch.autograd.set_grad_enabled梯度管理器,可设置为打开或关闭
                    #phase=="train"是True和False,双等号要注意
                    outputs = model(inputs)
                    loss = criterion(outputs, labels)
                    
                _, preds = torch.max(outputs, 1)
                #返回每一行最大的数和索引,prds的位置是索引的位置
                #也可以preds = outputs.argmax(dim=1)
                if phase == "train":
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()
                    
                running_loss += loss.item() * inputs.size(0) #交叉熵损失函数是平均过的
                running_corrects += torch.sum(preds.view(-1) == labels.view(-1)).item()
                #.view(-1)展开到一维,并自己计算
            
            epoch_loss = running_loss / len(dataloaders[phase].dataset)
            epoch_acc = running_corrects / len(dataloaders[phase].dataset)
       
            print("{} Loss: {} Acc: {}".format(phase, epoch_loss, epoch_acc))
            if phase == "val" and epoch_acc > best_acc:
                best_acc = epoch_acc
                best_model_wts = copy.deepcopy(model.state_dict())
                #模型变好,就拷贝更新后的模型参数
                
            if phase == "val":
                val_acc_history.append(epoch_acc) #记录每个epoch验证集的准确率
            
        print()
    
    time_elapsed = time.time() - since
    print("Training compete in {}m {}s".format(time_elapsed // 60, time_elapsed % 60))
    print("Best val Acc: {}".format(best_acc))
    
    model.load_state_dict(best_model_wts) #把最新的参数复制到model中
    return model, val_acc_history

7、运行模型

# Train and evaluate
model_ft, ohist = train_model(model_ft, dataloaders_dict, criterion, optimizer_ft, num_epochs=num_epochs)
ohist
'''[0.8954248366013072, 0.9281045751633987]'''
model_ft
# Initialize the non-pretrained version of the model used for this run
scratch_model,_ = initialize_model(model_name, 
                                   num_classes, 
                                   feature_extract=False, #所有参数都训练
                                   use_pretrained=False)# 不要imagenet的参数
scratch_model = scratch_model.to(device)
scratch_optimizer = optim.SGD(scratch_model.parameters(), 
                              lr=0.001, momentum=0.9)
scratch_criterion = nn.CrossEntropyLoss()
_,scratch_hist = train_model(scratch_model, 
                             dataloaders_dict, 
                             scratch_criterion, 
                             scratch_optimizer, 
                             num_epochs=num_epochs)
# Plot the training curves of validation accuracy vs. number 
#  of training epochs for the transfer learning method and
#  the model trained from scratch
# ohist = []
# shist = []

# ohist = [h.cpu().numpy() for h in ohist]
# shist = [h.cpu().numpy() for h in scratch_hist]

plt.title("Validation Accuracy vs. Number of Training Epochs")
plt.xlabel("Training Epochs")
plt.ylabel("Validation Accuracy")
plt.plot(range(1,num_epochs+1),ohist,label="Pretrained")
plt.plot(range(1,num_epochs+1),scratch_hist,label="Scratch")
plt.ylim((0,1.))
plt.xticks(np.arange(1, num_epochs+1, 1.0))
plt.legend()
plt.show()

在这里插入图片描述

  • 0
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值