深度学习 —— 个人学习笔记10（池化层、LeNet）

-Springer-

已于 2024-08-04 15:50:07 修改

阅读量137

点赞数 1

文章标签：深度学习学习笔记

于 2024-08-04 13:19:45 首次发布

本文链接：https://blog.csdn.net/qq_41159013/article/details/140904953

版权

声明

本文章为个人学习使用，版面观感若有不适请谅解，文中知识仅代表个人观点，若出现错误，欢迎各位批评指正。

二十一、池化层

1、最大池化层和平均池化层

与互相关运算符一样，汇聚窗口从输入张量的左上角开始，从左往右、从上往下的在输入张量内滑动。在汇聚窗口到达的每个位置，它计算该窗口中输入子张量的最大值或平均值。计算最大值或平均值是取决于使用了最大汇聚层还是平均汇聚层。

2、 torch.stack 和 torch.cat 的区别

维度创建：
torch.stack 会在堆叠时创建一个新的维度，‌将输入张量序列沿着这个新维度进行堆叠。‌这意味着堆叠后的张量的维度比输入张量序列的维度多一。‌
‌ torch.cat 不会引入新的维度，‌只在现有的某个维度上对输入张量进行拼接。‌
拼接方式：
torch.stack 会将输入张量序列按照指定维度进行逐个元素的堆叠，‌生成一个新的张量。‌这要求所有输入张量的形状必须相同。‌
torch.cat 则会对输入张量进行连接，‌不关心元素的位置，‌只要各个张量的拼接维度匹配即可。‌这种连接方式更加灵活，‌因为它不要求所有输入张量的形状完全相同，‌只要在拼接的维度上尺寸一致即可。

3、代码演示‌

import torch
from torch import nn

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def pool2d(X, pool_size, mode='max'):
    p_h, p_w = pool_size
    Y = torch.zeros((X.shape[0] - p_h + 1, X.shape[1] - p_w + 1)).to(device)
    for i in range(Y.shape[0]):
        for j in range(Y.shape[1]):
            if mode == 'max':
                Y[i, j] = X[i: i + p_h, j: j + p_w].max().to(device)
            elif mode == 'avg':
                Y[i, j] = X[i: i + p_h, j: j + p_w].mean().to(device)
    return Y


X = torch.tensor([[0.0, 1.0, 2.0], [3.0, 4.0, 5.0], [6.0, 7.0, 8.0]])
print("pool_size = (2, 2) 时 max : ", pool2d(X, (2, 2)))

print("pool_size = (2, 2) 时 avg : ", pool2d(X, (2, 2), 'avg'))

X = torch.arange(16, dtype=torch.float32).reshape((1, 1, 4, 4)).to(device)
print(f'X : {X}')

pool2d = nn.MaxPool2d(3)
print("pool = (3, 3) : ", pool2d(X))

pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print("pool = (3, 3), padding = 1, stride = 2 : ", pool2d(X))

pool2d = nn.MaxPool2d((2, 3), stride=(2, 3), padding=(0, 1))
print("pool = (2, 3), padding = (2, 3), stride = (0, 1) : ", pool2d(X))

##### 多个通道 #####
X = torch.cat((X, X + 1), 1)
print(f'X : {X}')

pool2d = nn.MaxPool2d(3, padding=1, stride=2)
print("pool = (3, 3), padding = 1, stride = 2 : ", pool2d(X))

‌

二十二、卷积神经网络（LeNet）

import torch
import torchvision
import time
from torch import nn
from IPython import display
import matplotlib.pyplot as plt
from matplotlib_inline import backend_inline
from torchvision import transforms
from torch.utils import data

mydevice = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def accuracy(y_hat, y):                                                           # 定义一个函数来为预测正确的数量计数
    """计算预测正确的数量"""
    if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:
        y_hat = y_hat.argmax(axis=1)
    cmp = y_hat.type(y.dtype) == y                                                # bool 类型，若预测结果与实际结果一致，则为 True
    return float(cmp.type(y.dtype).sum())

def evaluate_accuracy_gpu(net, data_iter, device=None):
    """使用GPU计算模型在数据集上的精度"""
    if isinstance(net, nn.Module):
        net.eval()  # 设置为评估模式
        if not device:
            device = next(iter(net.parameters())).device
    # 正确预测的数量，总预测的数量
    metric = Accumulator(2)
    with torch.no_grad():
        for X, y in data_iter:
            if isinstance(X, list):
                # BERT微调所需的（之后将介绍）
                X = [x.to(device) for x in X]
            else:
                X = X.to(device)
            y = y.to(device)
            metric.add(accuracy(net(X), y), y.numel())
    return metric[0] / metric[1]

def set_axes(axes, xlabel, ylabel, xlim, ylim, xscale, yscale, legend):
    axes.set_xlabel(xlabel), axes.set_ylabel(ylabel)
    axes.set_xscale(xscale), axes.set_yscale(yscale)
    axes.set_xlim(xlim),     axes.set_ylim(ylim)
    if legend:
        axes.legend(legend)
    axes.grid()

class Accumulator:                                                                # 定义一个实用程序类 Accumulator，用于对多个变量进行累加
    """在n个变量上累加"""
    def __init__(self, n):
        self.data = [0.0] * n

    def add(self, *args):
        self.data = [a + float(b) for a, b in zip(self.data, args)]

    def reset(self):
        self.data = [0.0] * len(self.data)

    def __getitem__(self, idx):
        return self.data[idx]

class Animator:                                                                   # 定义一个在动画中绘制数据的实用程序类 Animator
    """在动画中绘制数据"""
    def __init__(self, xlabel=None, ylabel=None, legend=None, xlim=None,
                 ylim=None, xscale='linear', yscale='linear',
                 fmts=('-', 'm--', 'g-.', 'r:'), nrows=1, ncols=1,
                 figsize=(3.5, 2.5)):
        # 增量地绘制多条线
        if legend is None:
            legend = []
        backend_inline.set_matplotlib_formats('svg')
        self.fig, self.axes = plt.subplots(nrows, ncols, figsize=figsize)
        if nrows * ncols == 1:
            self.axes = [self.axes, ]
        # 使用lambda函数捕获参数
        self.config_axes = lambda: set_axes(
            self.axes[0], xlabel, ylabel, xlim, ylim, xscale, yscale, legend)
        self.X, self.Y, self.fmts = None, None, fmts

    def add(self, x, y):
        # Add multiple data points into the figure
        if not hasattr(y, "__len__"):
            y = [y]
        n = len(y)
        if not hasattr(x, "__len__"):
            x = [x] * n
        if not self.X:
            self.X = [[] for _ in range(n)]
        if not self.Y:
            self.Y = [[] for _ in range(n)]
        for i, (a, b) in enumerate(zip(x, y)):
            if a is not None and b is not None:
                self.X[i].append(a)
                self.Y[i].append(b)
        self.axes[0].cla()
        for x, y, fmt in zip(self.X, self.Y, self.fmts):
            self.axes[0].plot(x, y, fmt)
        self.config_axes()
        display.display(self.fig)
        # 通过以下两行代码实现了在PyCharm中显示动图
        plt.draw()
        plt.pause(interval=0.001)
        display.clear_output(wait=True)
        plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']

class Timer:
    def __init__(self):
        self.times = []
        self.start()

    def start(self):
        self.tik = time.time()

    def stop(self):
        self.times.append(time.time() - self.tik)
        return self.times[-1]

    def sum(self):
        """Return the sum of time."""
        return sum(self.times)

def load_data_fashion_mnist(batch_size, resize=None):
    """下载 Fashion-MNIST 数据集，然后将其加载到内存中"""
    trans = [transforms.ToTensor()]
    if resize:
        trans.insert(0, transforms.Resize(resize))
    trans = transforms.Compose(trans)
    mnist_train = torchvision.datasets.FashionMNIST(
        root="../data", train=True, transform=trans, download=False)
    mnist_test = torchvision.datasets.FashionMNIST(
        root="../data", train=False, transform=trans, download=False)
    return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                            num_workers=4),
            data.DataLoader(mnist_test, batch_size, shuffle=False,
                            num_workers=4))


net = nn.Sequential(
    nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
    nn.AvgPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
    nn.Linear(120, 84), nn.Sigmoid(),
    nn.Linear(84, 10))

""" 尝试更换不同的激活函数及将平均池化层替换为最大池化层 """
net_demo = nn.Sequential(
    nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Tanh(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Conv2d(8, 32, kernel_size=5), nn.Tanh(),
    nn.MaxPool2d(kernel_size=2, stride=2),
    nn.Flatten(),
    nn.Linear(32 * 5 * 5, 128), nn.Tanh(),
    nn.Linear(128, 84), nn.Tanh(),
    nn.Linear(84, 10))

X = torch.rand(size=(1, 1, 28, 28), dtype=torch.float32)
for layer in net:
    X = layer(X)
    print(layer.__class__.__name__,'output shape: \t\t',X.shape)

batch_size = 256
train_iter, test_iter = load_data_fashion_mnist(batch_size=batch_size)

def train(net, train_iter, test_iter, num_epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', torch.cuda.get_device_name(device))
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    animator = Animator(xlabel='epoch', xlim=[1, num_epochs],
                            legend=['train loss', 'train acc', 'test acc'])
    timer, num_batches = Timer(), len(train_iter)
    for epoch in range(num_epochs):
        # 训练损失之和，训练准确率之和，样本数
        metric = Accumulator(3)
        net.train()
        for i, (X, y) in enumerate(train_iter):
            timer.start()
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            with torch.no_grad():
                metric.add(l * X.shape[0], accuracy(y_hat, y), X.shape[0])
            timer.stop()
            train_l = metric[0] / metric[2]
            train_acc = metric[1] / metric[2]
            if (i + 1) % (num_batches // 5) == 0 or i == num_batches - 1:
                animator.add(epoch + (i + 1) / num_batches,
                             (train_l, train_acc, None))
        test_acc = evaluate_accuracy_gpu(net, test_iter)
        animator.add(epoch + 1, (None, None, test_acc))
    plt.title(f'loss {train_l:.3f}, train acc {train_acc:.3f}, test acc {test_acc:.3f}\n'
              f'{metric[2] * num_epochs / timer.sum():.1f} examples/sec on {str(device)}')
    plt.show()


lr, num_epochs = 0.9, 10
train(net, train_iter, test_iter, num_epochs, lr, mydevice)


def show_images(imgs, num_rows, num_cols, titles=None, scale=1.5):
    figsize = (num_cols * scale, num_rows * scale)
    _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
    axes = axes.flatten()
    for i, (ax, img) in enumerate(zip(axes, imgs)):
        try:
            numpy = lambda x, *args, **kwargs: x.detach().numpy(*args, **kwargs)
            img = numpy(img)
        except:
            pass
        ax.imshow(img)
        ax.axes.get_xaxis().set_visible(False)
        ax.axes.get_yaxis().set_visible(False)
        if titles:
            plt.rcParams['font.sans-serif'] = ['Microsoft YaHei']
            plt.title(titles)
    plt.show()
    return axes

def show_activate(net, train_iter, num_epochs, lr, device):
    def init_weights(m):
        if type(m) == nn.Linear or type(m) == nn.Conv2d:
            nn.init.xavier_uniform_(m.weight)
    net.apply(init_weights)
    print('training on', torch.cuda.get_device_name(device))
    net.to(device)
    optimizer = torch.optim.SGD(net.parameters(), lr=lr)
    loss = nn.CrossEntropyLoss()
    Animator(xlabel='epoch', xlim=[1, num_epochs], legend=['train loss', 'train acc', 'test acc'])
    for epoch in range(num_epochs):
        net.train()
        for i, (X, y) in enumerate(train_iter):
            optimizer.zero_grad()
            X, y = X.to(device), y.to(device)
            y_hat = net(X)
            l = loss(y_hat, y)
            l.backward()
            optimizer.step()
            torch.no_grad()
    x_first_Sigmoid_layer = net[0:2](X)[0:9, 1, :, :]
    show_images(x_first_Sigmoid_layer.reshape(9, 28, 28).cpu().detach(), 1, 9, titles=f'第一次 {net[1]}')
    x_second_Sigmoid_layer = net[0:5](X)[0:9, 1, :, :]
    show_images(x_second_Sigmoid_layer.reshape(9, 10, 10).cpu().detach(), 1, 9, titles=f'第二次 {net[4]}')

""" 可多尝试不同情况 """
show_activate(net_demo, train_iter, num_epochs, lr, mydevice)

net = nn.Sequential(
   nn.Conv2d(1, 6, kernel_size=5, padding=2), nn.Sigmoid(),
   nn.AvgPool2d(kernel_size=2, stride=2),
   nn.Conv2d(6, 16, kernel_size=5), nn.Sigmoid(),
   nn.AvgPool2d(kernel_size=2, stride=2),
   nn.Flatten(),
   nn.Linear(16 * 5 * 5, 120), nn.Sigmoid(),
   nn.Linear(120, 84), nn.Sigmoid(),
   nn.Linear(84, 10))

net_demo = nn.Sequential(
   nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Sigmoid(),
   nn.MaxPool2d(kernel_size=2, stride=2),
   nn.Conv2d(8, 32, kernel_size=5), nn.Sigmoid(),
   nn.MaxPool2d(kernel_size=2, stride=2),
   nn.Flatten(),
   nn.Linear(32 * 5 * 5, 128), nn.Sigmoid(),
   nn.Linear(128, 84), nn.Sigmoid(),
   nn.Linear(84, 10))

net_demo = nn.Sequential(
   nn.Conv2d(1, 8, kernel_size=5, padding=2), nn.Tanh(),
   nn.MaxPool2d(kernel_size=2, stride=2),
   nn.Conv2d(8, 32, kernel_size=5), nn.Sigmoid(),
   nn.MaxPool2d(kernel_size=2, stride=2),
   nn.Flatten(),
   nn.Linear(32 * 5 * 5, 128), nn.Sigmoid(),
   nn.Linear(128, 84), nn.Sigmoid(),
   nn.Linear(84, 10))

文中部分知识参考：B 站 —— 跟李沐学AI；百度百科

-Springer-

关注

1
点赞
踩
3

收藏

觉得还不错? 一键收藏
0
评论
深度学习 —— 个人学习笔记10（池化层、LeNet）

在汇聚窗口到达的每个位置，它计算该窗口中输入子张量的最大值或平均值。torch.cat 则会对输入张量进行连接，‌不关心元素的位置，‌只要各个张量的拼接维度匹配即可。‌这种连接方式更加灵活，‌因为它不要求所有输入张量的形状完全相同，‌只要在拼接的维度上尺寸一致即可。‌这意味着堆叠后的张量的维度比输入张量序列的维度多一。torch.stack 会将输入张量序列按照指定维度进行逐个元素的堆叠，‌生成一个新的张量。‌ torch.cat 不会引入新的维度，‌只在现有的某个维度上对输入张量进行拼接。
复制链接

扫一扫