深度学习之PyTorch物体检测实战，读书笔记（三）

最新推荐文章于 2024-09-21 12:55:07 发布

艾1

最新推荐文章于 2024-09-21 12:55:07 发布

阅读量373

点赞数 1

分类专栏： yTorch物体检测实战文章标签：深度学习 pytorch 神经网络 Powered by 金山文档

本文链接：https://blog.csdn.net/znsjsnsnsn/article/details/129188100

版权

yTorch物体检测实战专栏收录该内容

3 篇文章 0 订阅

订阅专栏

ResNet通过引入残差块解决了深度卷积网络中的梯度消失问题，允许网络更易于训练。文章介绍了ResNet的基本结构，包括BasicBlock和BottleNeck模块，以及如何在PyTorch中实现这些模块。此外，还展示了数据集的划分和预处理，以及训练模型的步骤，包括损失函数、优化器和训练过程。

摘要由CSDN通过智能技术生成

一.里程碑：ResNet

出现的问题：卷积网络不断加深以寻求更优越的性能，然而随着网络的加深，网络却越发难以训练，一方面会产生梯度消失现象；另一方面越深的网络返回的梯度相关性会越来越差，接近于白噪声，导致梯度更新也接近于随机扰动。

（总而言之，为了追求更好的性能，卷积网络越来越深，带来了两个问题：

首先：网络的深度越来越高，网络越来越的难以训练，而且出现了梯度消失问题；

其次：深度越来越高，网络返回的梯度相关性越来越差）

解决方法：ResNet(Residual Network,残差网络较好的解决了这个问题)

我们期望的网络最终映射为H(x)，左侧的网络需要直接拟合输出H(x)，而右侧由ResNet提出的子模块，通过引入一个shortcut（捷径）分支，将需要拟合的映射变为残差F(x):H(x)-x，相较于直接优化潜在映射H(x)，优化残差映射F(x)是更为容易的。

由于F(x)+x是逐通道进行相加，因此根据两者是否通道数相同，存在两种Bottleneck结构。对于通道数不同的情况，比如每个卷积组的第一个Bottleneck，需要利用1×1卷积对x进行Downsample操作，将通道数变为相同，再进行加操作。对于相同的情况下，两者可以直接进行相加。

a.搭建BasivBlock

import torch
import torch.nn as nn


# 搭建BasicBlock模块
class BasicBlocak(nn.Module):
    expansion = 1//输出是输入的一倍

    def __init__(self, in_channle, out_channel, stride=1, downsample=None):
        super(BasicBlocak, self).__init__()
        # 使用BN层不用bias,bias最后会被抵消掉
        self.conv1 = nn.Conv2d(in_channels=in_channle, out_channels=out_channel, kernel_size=3, stride=stride,
                               padding=1, bias=False)
        self.bn1 = nn.BatchNorm2d(out_channel)  # BN层要放在conv和relu层中间，进行模型训练之前，需对数据做归一化处理，使其分布一致。
        # 在深度神经网络训练过程中，通常一次训练是一个batch，而非全体数据。每个batch具有不同的分布产生了internal covarivate shift问题——在训练过程中，数据分布会发生变化，
        # 对下一层网络的学习带来困难。Batch Normalization强行将数据拉回到均值为0，方差为1的正太分布上，一方面使得数据分布一致，另一方面避免梯度消失。
        self.conv2 = nn.Conv2d(in_channels=out_channel, out_channels=out_channel, kernel_size=3, padding=1, bias=False)
        self.bn2 = nn.BatchNorm2d(out_channel)

        self.downsample = downsample
        self.relu = nn.ReLU(inplace=True)

    # 定义前向传播
    def forward(self, X):
        identity = X
        Y = self.relu(self.bn1(self.conv1(X)))
        Y = self.bn2(self.conv2(Y))
        if self.downsample is not None:
            identity = self.downsample(X)
        return self.relu(Y + identity)

.b.搭建BottleNeck

classBottleNeck(nn.Module):expansion=4# 输入是输出的四倍def__init__(self,in_channel,out_channel,stride=1,downsample=None):super(BottleNeck,self).__init__()self.conv1=nn.Conv2d(in_channels=in_channel,out_channel=out_channel,kernel_size=1,bias=False)self.bn1=nn.BatchNorm2d(out_channel)self.conv2=nn.Conv2d(in_channels=out_channel,out_channel=out_channel,kernel_size=3,stride=stride,padding=1,bias=False)self.bn2=nn.BatchNorm2d(out_channel)self.conv3=nn.Conv2d(in_channels=out_channel,out_channels=out_channel*self.expansion,kernel_size=1,bias=)self.bn3=nn.BatchNorm2d(out_channel*self.expansion)self.downsample=downsampleself.relu=nn.ReLU(inplace=True)# 定义前向传播defforward(self,X):identity=XY=self.relu(self.bn1(self.conv1(X)))Y=self.relu(self.bn2(self.conv2(Y)))Y=self.bn3(self.conv3(Y))ifself.downsampleisnotNone:identity=self.downsample(X)returnself.relu(Y+identity)

.搭建 ResNet-layer的通用框架

class ResNet(nn.Module):
    # inculde_top是在残差网络的基础上搭建更加复杂网络时用到的
    def __init__(self, residual, num_residual, num_class=1000, include_top=True):
        super(ResNet, self).__init__()
        self.out_channel = 64
        self.include_top = include_top
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=self.out_channel, kernel_size=7, stride=2, padding=3,
                               bias=False)
        self.bn1 = nn.BatchNorm2d(self.out_channel)
        self.relu = nn.ReLU(inplace=True)
        self.maxpool = nn.MaxPool2d(kernel_size=3, stride=2, padding=1)
        self.conv2 = self.residual_block(residual, 64, num_residual[0])
        self.conv3 = self.residual_block(residual, 128, num_residual[1], stride=2)
        self.conv4 = self.residual_block(residual, 256, num_residual[2], stride=2)
        self.conv5 = self.residual_block(residual, 512, num_residual[1], stride=2)
        if self.include_top:
            self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
            self.fc = nn.Linear(512 * residual.expansion, num_class)

        # 对卷积层进行初始化操作
        for m in self.modules():
            if isinstance(m, nn.Conv2d):
                nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
            elif isinstance(m, (nn.BatchNorm2d, nn.GroupNorm)):
                nn.init.constant_(m.weight, 1)
                nn.init.constant_(m.bias, 0)

    # 搭建残差快
    def residual_block(self, residual, channel, num_residual, stride=1):
        downsample = None
        if stride != 1 or self.out_channel != channel * residual.expansion:
            downsample = nn.Sequential(
                nn.Conv2d(in_channels=self.out_channel, out_channels=channel * residual.expansion, kernel_size=1,
                          stride=stride, bias=False),
                nn.BatchNorm2d(channel * residual.expansion)

            )
        block = []
        block.append(residual(self.out_channel, channel, downsample=downsample, stride=stride))
        self.out_channel = channel * residual.expansion

        for _ in range(1, num_residual):
            block.append(residual(self.out_channel, channel))

        return nn.Sequential(*block)

    # 定义前向传播：
    def forward(self, X):
        Y = self.relu(self.bn1(self.conv1(X)))
        Y = self.maxpool(Y)
        Y = self.conv5(self.conv4(self.conv3(self.conv2(Y))))
        if self.include_top:
            Y = self.avgpool(Y)
            Y = torch.flatten(Y, 1)
            Y = self.fc(Y)
.# 模型网络结构可视化
net = resnet34()
from torchsummary import summary

# summary包主要用来查看网络的参数和网络大小
device = torch.device("cude" if torch.cuda.is_available() else "cpu")
model = net.to(device)
summary(model, (3, 224, 224))

.分离数据集代码

import os
import glob
import random
from PIL import Image

if __name__ == '__main__':
    split_rate = 0.1
    resize_image = 224
    file_path = 'data_set/flower_photos'
    dirs = glob.glob(os.path.join(file_path, '*'))
    dirs = [d for d in dirs if os.path.isdir(d)]
    print("Totally {} classes:{}".format(len(dirs), dirs))
for path in dirs:
    # 对每一个类别进行单独的处理
    path = path.split('\\')[-1]  # 表示分隔符、保留后面的一段字符
    # 在根目录下创建两个文件夹，train/test
    os.makedirs("data_set\\train\\{}".format(path), exist_ok=True)  # exist_ok：只有在目录不存在时创建目录，目录已存在时不会抛出异常。
    os.makedirs("data_set\\test\\{}".format(path), exist_ok=True)
    # 读取数据集中path类中对应类型的文件，并且添加到files中
    files = glob.glob(os.path.join(file_path, path, '*jpg'))
    files += glob.glob(os.path.join(file_path, path, '*jpeg'))
    files += glob.glob(os.path.join(file_path, path, '*png'))
    random.shuffle(files)
    split_boundary = int(len(files) * split_rate)

    for i, file in enumerate(files):
        img = Image.open(file).convert('RGB')
        # 更改图片的尺寸
        old_size=img.size
        ratio = float(resize_image) / max(old_size)  # 通过最长的size计算原始图片缩放比率
        # 把原始图片最长的size缩放到resize_pic，短的边等比率缩放，等比例缩放不会改变图片的原始长宽比
        new_size = tuple([int(x * ratio) for x in old_size])

        im = img.resize(new_size, Image.ANTIALIAS)  # 更改原始图片的尺寸，并设置图片高质量，保存成新图片im
        new_im = Image.new("RGB", (resize_image, resize_image))  # 创建一个resize_pic尺寸的黑色背景
        # 把新图片im贴到黑色背景上，并通过'地板除//'设置居中放置
        new_im.paste(im, ((resize_image - new_size[0]) // 2, (resize_image - new_size[1]) // 2))

        # 先划分0.1_rate的测试集，剩下的再划分为0.9_ate的训练集，同时直接更改图片后缀为.jpg
        assert new_im.mode == "RGB"
        if i < split_boundary:
            new_im.save(os.path.join("data_set\\test\\{}".format(path),
                                     file.split('\\')[-1].split('.')[0] + '.jpg'))
        else:
            new_im.save(os.path.join("data_set\\train\\{}".format(path),
                                     file.split('\\')[-1].split('.')[0] + '.jpg'))

        # 统计划分好的训练集和测试集中.jpg图片的数量
    train_files = glob.glob(os.path.join('data_set', 'train', '*', '*.jpg'))
    test_files = glob.glob(os.path.join('data_set', 'test', '*', '*.jpg'))

    print("Totally {} files for train".format(len(train_files)))
    print("Totally {} files for test".format(len(test_files)))

.train.py

"""
# 训练脚本
#
"""

import os
import sys
import json
import time

import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as Data
from torchvision import transforms, datasets
from tqdm import tqdm

from model import resnet34


def train_model():
    device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
    print("Using {} device.".format(device))

    # 数据预处理。transforms提供一系列数据预处理方法
    data_transform = {
        "train": transforms.Compose([transforms.RandomResizedCrop(224),    # 随机裁剪
                                     transforms.RandomHorizontalFlip(),    # 水平方向随机反转
                                     transforms.ToTensor(),
                                     transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])]),    # 标准化
        "val": transforms.Compose([transforms.Resize(256),    # 图像缩放
                                   transforms.CenterCrop(224),    # 中心裁剪
                                   transforms.ToTensor(),
                                   transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])])}

    # 获取数据集根目录(即当前代码文件夹路径)
    data_root = os.path.abspath(os.path.join(os.getcwd(), ".\\"))
    # 获取flower图片数据集路径
    image_path = os.path.join(data_root, "data_set")
    assert os.path.exists(image_path), "{} path does not exist.".format(image_path)

    # ImageFolder是一个通用的数据加载器，它要求我们以root/class/xxx.png格式来组织数据集的训练、验证或者测试图片。
    train_dataset = datasets.ImageFolder(root=os.path.join(image_path, "train"), transform=data_transform["train"])
    train_num = len(train_dataset)
    val_dataset = datasets.ImageFolder(root=os.path.join(image_path, "test"), transform=data_transform["val"])
    val_num = len(val_dataset)

    # {'daisy':0, 'dandelion':1, 'roses':2, 'sunflower':3, 'tulips':4}
    flower_list = train_dataset.class_to_idx
    class_dict = dict((val, key) for key, val in flower_list.items())    # 将字典中键值对翻转。此处翻转为 {'0':daisy,...}

    # 将class_dict编码成json格式文件
    json_str = json.dumps(class_dict, indent=4)
    with open('class_indices.json', 'w') as json_file:
        json_file.write(json_str)

    batch_size = 4    # 设置批大小。batch_size太大会报错OSError: [WinError 1455] 页面文件太小，无法完成操作。
    num_workers = min([os.cpu_count(), batch_size if batch_size > 1 else 0, 8])  # number of workers
    print("Using batch_size={} dataloader workers every process.".format(num_workers))

    # 加载训练集和测试集
    train_loader = Data.DataLoader(train_dataset, batch_size=batch_size,
                                   num_workers=num_workers, shuffle=True)
    val_loader = Data.DataLoader(val_dataset, batch_size=batch_size,
                                 num_workers=num_workers, shuffle=True)
    print("Using {} train_images for training, {} test_images for validation.".format(train_num, val_num))
    print()

    # 加载预训练权重
    # download url: https://download.pytorch.org/models/resnet34-b627a593.pth
    net = resnet34()
    model_weight_path = ".\\resnet34-b627a593.pth"    # 预训练权重
    assert os.path.exists(model_weight_path), "file {} does not exist.".format(model_weight_path)
    # torch.load_state_dict()函数就是用于将预训练的参数权重加载到新的模型之中
    net.load_state_dict(torch.load(model_weight_path, map_location='cpu'), strict=False)

    # 改变in_channel符合fc层的要求，调整output为数据集类别5
    in_channel = net.fc.in_features
    net.fc = nn.Linear(in_channel, 5)
    net.to(device)

    # 损失函数
    loss_function = nn.CrossEntropyLoss()

    # 优化器
    params = [p for p in net.parameters() if p.requires_grad]
    optimizer = optim.Adam(params, lr=0.0001)

    epochs = 10    # 训练迭代次数
    best_acc = 0.0
    save_path = '.\\resNet34.pth'    # 当前模型训练好后的权重参数文件保存路径
    batch_num = len(train_loader)    # 一个batch中数据的数量
    total_time = 0    # 统计训练过程总时间

    for epoch in range(epochs):
        # 开始迭代训练和测试
        start_time = time.perf_counter()  # 计算训练一个epoch的时间

        # train
        net.train()
        train_loss = 0.0
        train_bar = tqdm(train_loader, file=sys.stdout)    # tqdm是Python进度条库，可以在Python长循环中添加一个进度条提示信息。

        for step, data in enumerate(train_bar):
            train_images, train_labels = data
            train_images = train_images.to(device)
            train_labels = train_labels.to(device)

            optimizer.zero_grad()    # 梯度置零。清空之前的梯度信息
            outputs = net(train_images)    # 前向传播
            loss = loss_function(outputs, train_labels)    # 计算损失
            loss.backward()    # 反向传播
            optimizer.step()    # 参数更新
            train_loss += loss.item()    # 将计算的loss累加到train_loss中

            # desc：str类型，作为进度条说明，在进度条右边
            train_bar.desc = "train epoch[{}/{}] loss:{:.3f}.".format(epoch+1, epochs, loss)

        # validate
        net.eval()
        val_acc = 0.0
        val_bar = tqdm(val_loader, file=sys.stdout)

        with torch.no_grad():
            for val_data in val_bar:
                val_images, val_labels = val_data
                val_images = val_images.to(device)
                val_labels = val_labels.to(device)

                val_y = net(val_images)    # 前向传播
                predict_y = torch.max(val_y, dim=1)[1]    # 在维度为1上找到预测Y的最大值，第0个维度是batch
                # 计算测试集精度。predict_y与val_labels进行比较(true=1, False=0)的一个batch求和，所有batch的累加精度值
                val_acc += torch.eq(predict_y, val_labels).sum().item()

                val_bar.desc = "valid epoch[{}/{}].".format(epoch+1, epochs)

        # 打印epoch数据结果
        val_accurate = val_acc / val_num
        print("[epoch {:.0f}] train_loss: {:.3f}  val_accuracy: {:.3f}"
              .format(epoch+1, train_loss/batch_num, val_accurate))

        epoch_time = time.perf_counter() - start_time    # 计算训练一个epoch的时间
        print("epoch_time: {}".format(epoch_time))
        total_time += epoch_time    # 统计训练过程总时间
        print()

        # 调整测试集最优精度
        if val_accurate > best_acc:
            best_acc = val_accurate
            # model.state_dict()保存学习到的参数
            torch.save(net.state_dict(), save_path)    # 保存当前最高的准确度

    # 将训练过程总时间转换为h:m:s格式打印
    m, s = divmod(total_time, 60)
    h, m = divmod(m, 60)
    print("Total_time: {:.0f}:{:.0f}:{:.0f}".format(h, m, s))

    print('Finished Training!')


if __name__ == '__main__':
    train_model()

二.

a.定义一个名为my_func的函数，my_func函数接收两个int类型参数n和x，找出在1到n里的所有能被x整除的整数，用一个list数组列出这些整数，并让my_func函数返回这个数组以及这个数组的长度。

# Write your code here.
def my_func(n,x):
    li=[]
    for i in range(1,n+1):
        if i%x==0:
            li.append(i)
    return(li,len(li))