跟着沐神学深度学习-从入门到放弃的第3天(多层感知机实现)

最新推荐文章于 2024-11-06 20:38:13 发布

yanxiaoyu110

最新推荐文章于 2024-11-06 20:38:13 发布

阅读量80

点赞数

分类专栏： python 深度学习文章标签：深度学习人工智能

本文链接：https://blog.csdn.net/qq_36714950/article/details/134093127

版权

python 同时被 2 个专栏收录

17 篇文章 0 订阅

订阅专栏

深度学习

5 篇文章 0 订阅

订阅专栏

跟着沐神学深度学习-从入门到放弃的第3天(多层感知机实现)

原理：所谓多层感知机，其实就是将多个线性层进行串联，从而形成下图的形式
在这里插入图片描述
至于为什么在多层感知机中加入激活函数，原因是因为，如果没有激活函数，那么多层其实大型的单层，为了发挥多层架构的潜力，我们还需要一个额外的关键要素：在仿射变换之后对每个隐藏单元应用非线性的激活函数。

代码实现：

import torch
from d2l.torch import d2l
from matplotlib import pyplot as plt

import myfun

# 读取数据集
train_iter, test_iter = myfun.load_data_fashion_mnist(batch_size=256)

# 初始化模型参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
w1 = torch.nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True))
b1 = torch.nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
w2 = torch.nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True))
b2 = torch.nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [w1, b1, w2, b2]


# 定义relu函数
def relu(x):
  a = torch.zeros_like(x)
  return torch.max(x, a)


# 定义模型(采用的是relu函数)
def net(x):
  x = x.reshape((-1, num_inputs))
  h = relu(torch.matmul(x,w1)+b1)  # @代表矩阵乘法
  return torch.matmul(h,w2)+b2

# 定义损失函数
loss = torch.nn.CrossEntropyLoss()

#定义优化器
optimizer=torch.optim.SGD(params,lr=0.1)

# 训练模型
myfun.train_ch3(net, train_iter, test_iter, loss, num_epochs=10, updater=optimizer)

# 预测
myfun.predict_ch3(net, test_iter, n=6)

plt.show()

简洁实现

import torch
from d2l.torch import d2l
from matplotlib import pyplot as plt

import myfun

# 读取数据集
train_iter, test_iter = myfun.load_data_fashion_mnist(batch_size=256)

# 初始化模型参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
w1 = torch.nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True))
b1 = torch.nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
w2 = torch.nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True))
b2 = torch.nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [w1, b1, w2, b2]


# 定义relu函数
def relu(x):
  a = torch.zeros_like(x)
  return torch.max(x, a)


# 定义模型(采用的是relu函数)
# def net(x):
#   x = x.reshape((-1, num_inputs))
#   h = relu(torch.matmul(x,w1)+b1)  # @代表矩阵乘法
#   return torch.matmul(h,w2)+b2

# 定义模型(采用的是relu函数)
net = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(num_inputs, num_hiddens), torch.nn.ReLU(),
                          torch.nn.Linear(num_hiddens, num_outputs))


# 初始化模型
def init_weight(m):
  if type(m) == torch.nn.Linear:
    torch.nn.init.normal_(m.weight, std=0.01)


net.apply(init_weight)  # net.apply()会自动递归地搜索网络内的所有module并把参数表示的函数应用到所有的module上

# 定义损失函数
loss = torch.nn.CrossEntropyLoss()

# 定义优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)

# 训练模型
myfun.train_ch3(net, train_iter, test_iter, loss, num_epochs=10, updater=optimizer)

# 预测
myfun.predict_ch3(net, test_iter, n=6)

plt.show()

myfun函数封装（myfun.py）

import random
import time
import numpy as np
import torch
import torchvision.datasets
from torch.utils import data
from matplotlib import pyplot as plt  # 导入绘图模块
from torchvision import transforms  # 导入图像处理模块


def f(x):
  y = 3 * x ** 2 - 4 * x
  return y


def calculus(f, x, h):
  return (f(x + h) - f(x)) / h


class Timer:  # @save
  """记录多次运行时间"""

  def __init__(self):
    self.times = []
    self.start()

  def start(self):
    """启动计时器"""
    self.tik = time.time()

  def stop(self):
    """停止计时器并将时间记录在列表中"""
    self.times.append(time.time() - self.tik)
    return self.times[-1]

  def avg(self):
    """返回平均时间"""
    return sum(self.times) / len(self.times)

  def sum(self):
    """返回时间总和"""
    return sum(self.times)

  def cumsum(self):
    """返回累计时间"""
    return np.array(self.times).cumsum().tolist()


# 定义一个函数，并生成观测值函数
def synthetic_data(w, b, num_examples):
  x = torch.normal(0, 1, (num_examples, len(w)))  # 生成x的数据的维度为num_examples*len(w)

  y = torch.matmul(x, w) + b
  y = y + torch.normal(0, 0.01, y.shape)
  return x, y.reshape((-1, 1))  # 返回x和y的维度为num_examples*1


def data_iter(feature, lable, batch_size):
  # 生成一个随机索引从而进行随机进行抽样
  num_feature = len(feature)
  num_index = list(range(num_feature))
  random.shuffle(num_index)  # 将索引打乱
  for i in range(0, num_feature, batch_size):
    batch_index = torch.tensor(num_index[i:min(i + batch_size, num_feature)])
    yield feature[batch_index], lable[batch_index]


def linreg(x, w, b):
  return torch.matmul(x, w) + b


def square_loss(y_hat, y):
  return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2


def sgd(params, lr, batch_size):  # 参数更新函数
  with torch.no_grad():  # 防止梯度爆炸
    for param in params:
      param -= lr * param.grad / batch_size
      param.grad.zero_()


def loadarry(data_arrays, batch_size, is_train=True):  # 利用自带的函数来定义一个加载数据函数
  dataset = data.TensorDataset(*data_arrays)  # 此处用于服务DataLoader
  return data.DataLoader(dataset, batch_size, shuffle=is_train)


def get_fashion_mnist_labels(labels):  # 给数据集添加标签
  """返回Fashion-MNIST数据集的文本标签"""
  text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
                 'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
  return [text_labels[int(i)] for i in labels]


def show_images(imgs, num_rows, num_cols, titles=None, scale=3):  # 适合画多个图形的函数
  """绘制图像列表"""
  figsize = (num_cols * scale, num_rows * scale)
  _, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
  axes = axes.flatten()
  for i, (ax, img) in enumerate(zip(axes, imgs)):
    if torch.is_tensor(img):
      # 图片张量
      ax.imshow(img.numpy())
    else:
      # PIL图片
      ax.imshow(img)
    ax.axes.get_xaxis().set_visible(False)  # 将x轴的刻度进行隐藏
    ax.axes.get_yaxis().set_visible(False)
    if titles:
      ax.set_title(titles[i])
  return axes


def get_dataloader_workers():  # @save
  """使用4个进程来读取数据"""
  return 0


def load_data_fashion_mnist(batch_size, resize=None):  # @save
  """下载Fashion-MNIST数据集，然后将其加载到内存中"""
  trans = [transforms.ToTensor()]
  if resize:
    trans.insert(0, transforms.Resize(resize))
  trans = transforms.Compose(trans)
  mnist_train = torchvision.datasets.FashionMNIST(
    root="./data", train=True, transform=trans, download=True)
  mnist_test = torchvision.datasets.FashionMNIST(
    root="./data", train=False, transform=trans, download=True)
  return (data.DataLoader(mnist_train, batch_size, shuffle=True,
                          num_workers=get_dataloader_workers()),  # 进行批量进行读取
          data.DataLoader(mnist_test, batch_size, shuffle=False,
                          num_workers=get_dataloader_workers()))


def softmax(x):  # 定义softmax函数，其作用是将向量进行归一化
  x_exp = torch.exp(x)
  partition = x_exp.sum(dim=1, keepdim=True)
  return x_exp / partition  # 这里进行拉广播操作


# 定义损失函数
def cross_entropy(y_hat, y):
  return -torch.log(y_hat[range(len(y_hat)), y])


def accuracy(y_hat, y):
  """计算预测正确的数量"""
  if len(y_hat.shape) > 1 and y_hat.shape[1] > 1:  # 如果y_hat的维度大于1，且y_hat的特征大于1
    y_hat = y_hat.argmax(axis=1)  # 寻找其中最大值的索引，按照行来进行
  cmp = y_hat.type(y.dtype) == y  # 由于==具有很高的限制性，因此需要将y_hat的数据类型转化为y的数据类型
  return float(cmp.type(y.dtype).sum())  # 将其进行相加从而得到其正确的个数


class Accumulator:
  """在`n`个变量上累加。"""

  # 首先定义一个初始函数
  def __init__(self, n):
    # 用于创建一个保存数值的列表
    self.data = [0.0] * n  # 列表的重复

  def add(self, *args):  # 定义一个添加函数，用于存储数据相加
    self.data = [a + float(b) for a, b in zip(self.data, args)]  # 将数据进行相加

  def reset(self):  # 定义一个重置函数，用于将数据进行重置
    self.data = [0.0] * len(self.data)

  def __getitem__(self, idx):
    return self.data[idx]


def evaluate_accuracy(net, data_iter):  # 定义一个准确率函数
  if isinstance(net, torch.nn.Module):  # 如果net是一个自带的模块
    net.eval()  # 将其设为评估模式
  metric = Accumulator(2)  # 定义一个累加器
  with torch.no_grad():
    for x, y in data_iter:
      metric.add(accuracy(net(x), y), y.numel())  # 将准确率和样本数量进行相加,将准确度以及标签个数进行相加
  return metric[0] / metric[1]  # 返回准确率


# *******核心代码********
def train_epoch_ch3(net, train_iter, loss, updater):
  if isinstance(net, torch.nn.Module):
    net.train()
  metric = Accumulator(3)  # 定义一个累加器
  for x, y in train_iter:
    y_hat = net(x)
    l = loss(y_hat, y)  # 计算损失
    if isinstance(updater, torch.optim.Optimizer):
      updater.zero_grad()
      l.backward()
      updater.step()
      metric.add(float(l) * len(y), accuracy(y_hat, y), y.size().numel())  # 分别计算损失，准确度，以及总的数量
    else:

      l.sum().backward()
      updater(x.shape[0])
      metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
  return metric[0] / metric[2], metric[1] / metric[2]


# *******************************


def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
  train_loss, train_accs, test_accs = [], [], []
  for epoch in range(num_epochs):
    train_loss_data, train_acc = train_epoch_ch3(net, train_iter, loss, updater)
    # train_acc = evaluate_accuracy(net, train_iter)
    test_acc = evaluate_accuracy(net, test_iter)
    train_loss.append(train_loss_data)
    train_accs.append(train_acc)
    test_accs.append(test_acc)
  return train_loss, train_accs, test_accs  # 返回训练集和测试集的准确率


def predict_ch3(net, test_iter, n=6):  # 定义一个预测函数,来测试模型效果
  for x, y in test_iter:
    break
  trues = get_fashion_mnist_labels(y)  # 得到y的真实标签
  preds = get_fashion_mnist_labels(net(x).argmax(axis=1))  # 得到预测的标签
  titles = [true + '\n' + pred for true, pred in zip(trues, preds)]  # 得到一个列表表达式
  show_images(x[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])