跟着沐神学深度学习-从入门到放弃的第3天(多层感知机实现)
原理:所谓多层感知机,其实就是将多个线性层进行串联,从而形成下图的形式
至于为什么在多层感知机中加入激活函数,原因是因为,如果没有激活函数,那么多层其实大型的单层
,为了发挥多层架构的潜力, 我们还需要一个额外的关键要素: 在仿射变换之后对每个隐藏单元应用非线性的激活函数。
代码实现:
import torch
from d2l.torch import d2l
from matplotlib import pyplot as plt
import myfun
# 读取数据集
train_iter, test_iter = myfun.load_data_fashion_mnist(batch_size=256)
# 初始化模型参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
w1 = torch.nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True))
b1 = torch.nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
w2 = torch.nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True))
b2 = torch.nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [w1, b1, w2, b2]
# 定义relu函数
def relu(x):
a = torch.zeros_like(x)
return torch.max(x, a)
# 定义模型(采用的是relu函数)
def net(x):
x = x.reshape((-1, num_inputs))
h = relu(torch.matmul(x,w1)+b1) # @代表矩阵乘法
return torch.matmul(h,w2)+b2
# 定义损失函数
loss = torch.nn.CrossEntropyLoss()
#定义优化器
optimizer=torch.optim.SGD(params,lr=0.1)
# 训练模型
myfun.train_ch3(net, train_iter, test_iter, loss, num_epochs=10, updater=optimizer)
# 预测
myfun.predict_ch3(net, test_iter, n=6)
plt.show()
简洁实现
import torch
from d2l.torch import d2l
from matplotlib import pyplot as plt
import myfun
# 读取数据集
train_iter, test_iter = myfun.load_data_fashion_mnist(batch_size=256)
# 初始化模型参数
num_inputs, num_outputs, num_hiddens = 784, 10, 256
w1 = torch.nn.Parameter(torch.randn(num_inputs, num_hiddens, requires_grad=True))
b1 = torch.nn.Parameter(torch.zeros(num_hiddens, requires_grad=True))
w2 = torch.nn.Parameter(torch.randn(num_hiddens, num_outputs, requires_grad=True))
b2 = torch.nn.Parameter(torch.zeros(num_outputs, requires_grad=True))
params = [w1, b1, w2, b2]
# 定义relu函数
def relu(x):
a = torch.zeros_like(x)
return torch.max(x, a)
# 定义模型(采用的是relu函数)
# def net(x):
# x = x.reshape((-1, num_inputs))
# h = relu(torch.matmul(x,w1)+b1) # @代表矩阵乘法
# return torch.matmul(h,w2)+b2
# 定义模型(采用的是relu函数)
net = torch.nn.Sequential(torch.nn.Flatten(), torch.nn.Linear(num_inputs, num_hiddens), torch.nn.ReLU(),
torch.nn.Linear(num_hiddens, num_outputs))
# 初始化模型
def init_weight(m):
if type(m) == torch.nn.Linear:
torch.nn.init.normal_(m.weight, std=0.01)
net.apply(init_weight) # net.apply()会自动递归地搜索网络内的所有module并把参数表示的函数应用到所有的module上
# 定义损失函数
loss = torch.nn.CrossEntropyLoss()
# 定义优化器
optimizer = torch.optim.SGD(net.parameters(), lr=0.1)
# 训练模型
myfun.train_ch3(net, train_iter, test_iter, loss, num_epochs=10, updater=optimizer)
# 预测
myfun.predict_ch3(net, test_iter, n=6)
plt.show()
myfun函数封装(myfun.py)
import random
import time
import numpy as np
import torch
import torchvision.datasets
from torch.utils import data
from matplotlib import pyplot as plt # 导入绘图模块
from torchvision import transforms # 导入图像处理模块
def f(x):
y = 3 * x ** 2 - 4 * x
return y
def calculus(f, x, h):
return (f(x + h) - f(x)) / h
class Timer: # @save
"""记录多次运行时间"""
def __init__(self):
self.times = []
self.start()
def start(self):
"""启动计时器"""
self.tik = time.time()
def stop(self):
"""停止计时器并将时间记录在列表中"""
self.times.append(time.time() - self.tik)
return self.times[-1]
def avg(self):
"""返回平均时间"""
return sum(self.times) / len(self.times)
def sum(self):
"""返回时间总和"""
return sum(self.times)
def cumsum(self):
"""返回累计时间"""
return np.array(self.times).cumsum().tolist()
# 定义一个函数,并生成观测值函数
def synthetic_data(w, b, num_examples):
x = torch.normal(0, 1, (num_examples, len(w))) # 生成x的数据的维度为num_examples*len(w)
y = torch.matmul(x, w) + b
y = y + torch.normal(0, 0.01, y.shape)
return x, y.reshape((-1, 1)) # 返回x和y的维度为num_examples*1
def data_iter(feature, lable, batch_size):
# 生成一个随机索引从而进行随机进行抽样
num_feature = len(feature)
num_index = list(range(num_feature))
random.shuffle(num_index) # 将索引打乱
for i in range(0, num_feature, batch_size):
batch_index = torch.tensor(num_index[i:min(i + batch_size, num_feature)])
yield feature[batch_index], lable[batch_index]
def linreg(x, w, b):
return torch.matmul(x, w) + b
def square_loss(y_hat, y):
return (y_hat - y.reshape(y_hat.shape)) ** 2 / 2
def sgd(params, lr, batch_size): # 参数更新函数
with torch.no_grad(): # 防止梯度爆炸
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
def loadarry(data_arrays, batch_size, is_train=True): # 利用自带的函数来定义一个加载数据函数
dataset = data.TensorDataset(*data_arrays) # 此处用于服务DataLoader
return data.DataLoader(dataset, batch_size, shuffle=is_train)
def get_fashion_mnist_labels(labels): # 给数据集添加标签
"""返回Fashion-MNIST数据集的文本标签"""
text_labels = ['t-shirt', 'trouser', 'pullover', 'dress', 'coat',
'sandal', 'shirt', 'sneaker', 'bag', 'ankle boot']
return [text_labels[int(i)] for i in labels]
def show_images(imgs, num_rows, num_cols, titles=None, scale=3): # 适合画多个图形的函数
"""绘制图像列表"""
figsize = (num_cols * scale, num_rows * scale)
_, axes = plt.subplots(num_rows, num_cols, figsize=figsize)
axes = axes.flatten()
for i, (ax, img) in enumerate(zip(axes, imgs)):
if torch.is_tensor(img):
# 图片张量
ax.imshow(img.numpy())
else:
# PIL图片
ax.imshow(img)
ax.axes.get_xaxis().set_visible(False) # 将x轴的刻度进行隐藏
ax.axes.get_yaxis().set_visible(False)
if titles:
ax.set_title(titles[i])
return axes
def get_dataloader_workers(): # @save
"""使用4个进程来读取数据"""
return 0
def load_data_fashion_mnist(batch_size, resize=None): # @save
"""下载Fashion-MNIST数据集,然后将其加载到内存中"""
trans = [transforms.ToTensor()]
if resize:
trans.insert(0, transforms.Resize(resize))
trans = transforms.Compose(trans)
mnist_train = torchvision.datasets.FashionMNIST(
root="./data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.FashionMNIST(
root="./data", train=False, transform=trans, download=True)
return (data.DataLoader(mnist_train, batch_size, shuffle=True,
num_workers=get_dataloader_workers()), # 进行批量进行读取
data.DataLoader(mnist_test, batch_size, shuffle=False,
num_workers=get_dataloader_workers()))
def softmax(x): # 定义softmax函数,其作用是将向量进行归一化
x_exp = torch.exp(x)
partition = x_exp.sum(dim=1, keepdim=True)
return x_exp / partition # 这里进行拉广播操作
# 定义损失函数
def cross_entropy(y_hat, y):
return -torch.log(y_hat[range(len(y_hat)), y])
def accuracy(y_hat, y):
"""计算预测正确的数量"""
if len(y_hat.shape) > 1 and y_hat.shape[1] > 1: # 如果y_hat的维度大于1,且y_hat的特征大于1
y_hat = y_hat.argmax(axis=1) # 寻找其中最大值的索引,按照行来进行
cmp = y_hat.type(y.dtype) == y # 由于==具有很高的限制性,因此需要将y_hat的数据类型转化为y的数据类型
return float(cmp.type(y.dtype).sum()) # 将其进行相加从而得到其正确的个数
class Accumulator:
"""在`n`个变量上累加。"""
# 首先定义一个初始函数
def __init__(self, n):
# 用于创建一个保存数值的列表
self.data = [0.0] * n # 列表的重复
def add(self, *args): # 定义一个添加函数,用于存储数据相加
self.data = [a + float(b) for a, b in zip(self.data, args)] # 将数据进行相加
def reset(self): # 定义一个重置函数,用于将数据进行重置
self.data = [0.0] * len(self.data)
def __getitem__(self, idx):
return self.data[idx]
def evaluate_accuracy(net, data_iter): # 定义一个准确率函数
if isinstance(net, torch.nn.Module): # 如果net是一个自带的模块
net.eval() # 将其设为评估模式
metric = Accumulator(2) # 定义一个累加器
with torch.no_grad():
for x, y in data_iter:
metric.add(accuracy(net(x), y), y.numel()) # 将准确率和样本数量进行相加,将准确度以及标签个数进行相加
return metric[0] / metric[1] # 返回准确率
# *******核心代码********
def train_epoch_ch3(net, train_iter, loss, updater):
if isinstance(net, torch.nn.Module):
net.train()
metric = Accumulator(3) # 定义一个累加器
for x, y in train_iter:
y_hat = net(x)
l = loss(y_hat, y) # 计算损失
if isinstance(updater, torch.optim.Optimizer):
updater.zero_grad()
l.backward()
updater.step()
metric.add(float(l) * len(y), accuracy(y_hat, y), y.size().numel()) # 分别计算损失,准确度,以及总的数量
else:
l.sum().backward()
updater(x.shape[0])
metric.add(float(l.sum()), accuracy(y_hat, y), y.numel())
return metric[0] / metric[2], metric[1] / metric[2]
# *******************************
def train_ch3(net, train_iter, test_iter, loss, num_epochs, updater):
train_loss, train_accs, test_accs = [], [], []
for epoch in range(num_epochs):
train_loss_data, train_acc = train_epoch_ch3(net, train_iter, loss, updater)
# train_acc = evaluate_accuracy(net, train_iter)
test_acc = evaluate_accuracy(net, test_iter)
train_loss.append(train_loss_data)
train_accs.append(train_acc)
test_accs.append(test_acc)
return train_loss, train_accs, test_accs # 返回训练集和测试集的准确率
def predict_ch3(net, test_iter, n=6): # 定义一个预测函数,来测试模型效果
for x, y in test_iter:
break
trues = get_fashion_mnist_labels(y) # 得到y的真实标签
preds = get_fashion_mnist_labels(net(x).argmax(axis=1)) # 得到预测的标签
titles = [true + '\n' + pred for true, pred in zip(trues, preds)] # 得到一个列表表达式
show_images(x[0:n].reshape((n, 28, 28)), 1, n, titles=titles[0:n])