1.多层感知机网络局部实现
(1)损失函数实现
# Defined in file: ./chapter_linear-networks/linear-regression-scratch.md
def squared_loss(y_hat, y):
"""Squared loss."""
return (y_hat - d2l.reshape(y, y_hat.shape))**2 / 2
def cross_entropy(y_hat, y):
"""
:param y_hat: predict prob [list]
:param y: ground true index
:return: loss
"""
return -torch.log(y_hat[range(len(y)), y])
#soft max model
def softmax(x):
x_exp = torch.exp(x)
x_exp_sum = x_exp.sum(1, keepdim=True)
return x_exp / x_exp_sum
def net(x):
out = softmax(torch.matmul(x.reshape(-1, w.shape[0]), w) + b)
return out
(2)SGD优化
def sgd(params, lr, batch_size):
"""Minibatch stochastic gradient descent."""
with torch.no_grad():
for param in params:
param -= lr * param.grad / batch_size
param.grad.zero_()
(3)relu
# relu #
def relu(x):
a = torch.zeros_like(x)
return torch.max(x,a)
(4) dropout
def dropout_layer(x):
if dropout==0:
return x
if dropout == 1:
return torch.zeros_like(x)
mask = (torch.randn(x.shape) > dropout).float()
return (mask * x) / (1 - dropout)
(5) 模型参数初始化
简单一点的:
from torch.nn import init
class mlp_1(nn.Module):
def __init__(self, num_inputs, num_outputs, hide_layer):
super(mlp_1, self).__init__()
self.layer1 = nn.Linear(num_inputs, hide_layer)
self.layer2 = nn.Linear(hide_layer, num_outputs)
self.init_weight()
def forward(self, x):
x = x.reshape(-1, 784)
x = torch.relu(self.layer1(x))
x = self.layer2(x)
return x
def init_weight(self):
init.normal_(self.layer1.weight, mean=0, std=0.01)
init.normal_(self.layer2.weight, mean=0, std=0.01)
init.constant_(self.layer1.bias, 0)
init.constant_(self.layer2.bias, 0)
利用sequential的:
class mlp_net(nn.Module):
def __init__(self, dropout1, dropout2, num_inputs, hide_layer1, hide_layer2, num_outputs):
super(mlp_net, self).__init__()
self.net = nn.Sequential(
nn.Flatten(), nn.Linear(num_inputs, hide_layer1),nn.ReLU(),nn.Dropout(p=dropout1),
nn.Linear(hide_layer1, hide_layer2), nn.ReLU(), nn.Dropout(p=dropout2),
nn.Linear(hide_layer2, num_outputs)
)
self.init_weight()
def forward(self, x):
return self.net(x)
def init_weight(self):
for m in self.net:
if type(m) == nn.Linear:
# print(m)
nn.init.normal_(m.weight, mean=0, std=0.01)
nn.init.constant_(m.bias, 0)
2. 整体代码实现
softmax
import torch
from torch import nn
from torch.nn import init
import numpy as np
from d2l import torch as d2l
# from soft_max import load_data_fashion_mnist
# data #
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
# model #
num_inputs = 784
num_outputs = 10
lr = 0.03
num_epochs = 10
class LineNET(nn.Module):
def __init__(self, num_inputs, num_outputs):
super(LineNET, self).__init__()
self.liner = nn.Linear(num_inputs, num_outputs)
self.init_weight()
def forward(self, x):
# x(b,1,28,28)-->bx784 拉平成一个向量
x = x.reshape(x.shape[0], -1)
x = self.liner(x)
return x
def init_weight(self):
init.normal_(self.liner.weight, mean=0, std=0.01)
init.constant_(self.liner.bias, 0)
net = LineNET(num_inputs, num_outputs)
# loss #
loss = torch.nn.CrossEntropyLoss()
# optimization #
optim = torch.optim.SGD(net.parameters(), lr=lr)
# train #
# from soft_max import train_epoch_ch9
# from soft_max import evaluate_accuracy
def train_model(net, loss, train_iter, test_iter, optim, num_epochs):
for epoch in range(num_epochs):
train_metric = d2l.train_epoch_ch3(net, train_iter, loss, optim)
test_metric = d2l.evaluate_accuracy(net, test_iter)
print(f"epochs{epoch+1}, train_loss {float(train_metric[0])}")
print(f"epochs{epoch + 1}, train_acc {float(train_metric[1])}")
print(f"epochs{epoch + 1}, test_acc {float(test_metric)}")
train_model(net, loss, train_iter, test_iter, optim, num_epochs)
mlp
# one hide layer #
import torch
from torch import nn
from d2l import torch as d2l
from torch.nn import init
import torchvision
# data #
batch_size = 128
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size=batch_size)
# net #
class mlp_1(nn.Module):
def __init__(self, num_inputs, num_outputs, hide_layer):
super(mlp_1, self).__init__()
self.layer1 = nn.Linear(num_inputs, hide_layer)
self.layer2 = nn.Linear(hide_layer, num_outputs)
self.init_weight()
def forward(self, x):
x = x.reshape(-1, 784)
x = torch.relu(self.layer1(x))
x = self.layer2(x)
return x
def init_weight(self):
init.normal_(self.layer1.weight, mean=0, std=0.01)
init.normal_(self.layer2.weight, mean=0, std=0.01)
init.constant_(self.layer1.bias, 0)
init.constant_(self.layer2.bias, 0)
num_inputs, num_outputs, hide_layer = 784, 10, 256
# net = mlp_1(num_inputs=num_inputs, num_outputs=num_outputs, hide_layer=hide_layer)
# loss #
loss = nn.CrossEntropyLoss()
# optimization #
lr = 0.03
# optim = torch.optim.SGD(net.parameters(), lr=lr)
# train #
num_epochs = 20
def train_model(net, loss, optim, train_iter, test_iter, num_epochs):
for epoch in range (num_epochs):
train_metric = d2l.train_epoch_ch3(net, train_iter, loss, optim)
# print(train_metric)
test_metric = d2l.evaluate_accuracy(net, test_iter)
print(f"epoch {epoch+1}, train_loss {float(train_metric[0])}, train_acc {float(train_metric[1])}")
print(f"epoch {epoch+1}, test_acc {float(test_metric)}")
# train_model(net, loss, optim, train_iter, test_iter, num_epochs)
# 自己实现 #
# net parameters #
w1 = nn.Parameter(
torch.randn(num_inputs, hide_layer, requires_grad=True)
)
b1 = nn.Parameter(
torch.zeros(hide_layer, requires_grad=True)
)
w2 = nn.Parameter(
torch.randn(hide_layer, num_outputs, requires_grad=True)
)
b2 = nn.Parameter(
torch.zeros(num_outputs, requires_grad=True)
)
params = [w1, w2, b1, b2]
# print(params)
# relu #
def relu(x):
a = torch.zeros_like(x)
return torch.max(x,a)
# net #
def net(x):
x = x.reshape(-1, num_inputs)
x = relu(torch.matmul(x, w1) + b1)
x = torch.matmul(x, w2) + b2
return x
# optim #
optim = torch.optim.SGD(params, lr=lr)
# train_model(net,loss, optim, train_iter, test_iter, num_epochs)
dropout
# self_realization #
import torch
from d2l import torch as d2l
import torch.nn as nn
from torch.nn import init
# data #
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
dropout = 0
def dropout_layer(x):
if dropout==0:
return x
if dropout == 1:
return torch.zeros_like(x)
mask = (torch.randn(x.shape) > dropout).float()
return (mask * x) / (1 - dropout)
# net #
num_inputs, hide_layer1, hide_layer2, num_output = 784, 256, 128, 10
w1 = nn.Parameter(
torch.randn(num_inputs, hide_layer1, requires_grad=True)
)
w2 = nn.Parameter(
torch.randn(hide_layer1, hide_layer2, requires_grad=True)
)
w3 = nn.Parameter(
torch.randn(hide_layer2, num_output, requires_grad=True)
)
b1 = nn.Parameter(
torch.zeros(hide_layer1, requires_grad=True)
)
b2 = nn.Parameter(
torch.zeros(hide_layer2, requires_grad=True)
)
b3 = nn.Parameter(
torch.zeros(num_output, requires_grad=True)
)
params = [w1, w2, w3, b1, b2, b3]
def net(x):
x = x.reshape(-1, num_inputs)
x = torch.relu(x @ w1 + b1)
# print(x.shape)
x = dropout_layer(x)
# print(x.shape)
x = torch.relu(x @ w2 + b2)
x = dropout_layer(x)
x = x @ w3 + b3
return x
# loss #
# def cross_entropy(y_hat, y):
# """
# :param y_hat: predict prob [list]
# :param y: ground true index
# :return: loss
# """
# loss = -torch.log(y_hat[range(len(y)), y])
# return loss
loss = nn.CrossEntropyLoss()
# optim #
lr = 0.03
# optim = torch.optim.SGD(params, lr=lr)
num_epochs =10
def train_model(net, loss, train_iter, test_iter, optim, num_epochs):
for epoch in range(num_epochs):
train_metric = d2l.train_epoch_ch3(net, train_iter, loss, optim)
test_metric = d2l.evaluate_accuracy(net, test_iter)
print(f"epoch {epoch+1}, train_loss {train_metric[0]}, train_acc {train_metric[1]}")
print(f"epoch {epoch+1}, test acc {test_metric}")
# train_model(net,loss,train_iter, test_iter, optim, num_epochs)
# pytorch实现 #
class mlp_net(nn.Module):
def __init__(self, dropout1, dropout2, num_inputs, hide_layer1, hide_layer2, num_outputs):
super(mlp_net, self).__init__()
self.net = nn.Sequential(
nn.Flatten(), nn.Linear(num_inputs, hide_layer1),nn.ReLU(),nn.Dropout(p=dropout1),
nn.Linear(hide_layer1, hide_layer2), nn.ReLU(), nn.Dropout(p=dropout2),
nn.Linear(hide_layer2, num_outputs)
)
self.init_weight()
def forward(self, x):
return self.net(x)
def init_weight(self):
for m in self.net:
if type(m) == nn.Linear:
# print(m)
nn.init.normal_(m.weight, mean=0, std=0.01)
nn.init.constant_(m.bias, 0)
dropout1 = 0.0
dropout2 =0.0
net1 = mlp_net(dropout1, dropout2, num_inputs, hide_layer1, hide_layer2, num_output)
optim = torch.optim.SGD(net1.parameters(),lr=lr)
# print(net1.net[0])
train_model(net1, loss, train_iter, test_iter, optim, num_epochs)