逻辑回归用于解决二分类问题,其泛化形式为 softmax 回归(解决多分类问题)。
softmax 回归,其网络结构与线性回归基本一致;不同之处在于,output 层维度与分类别数保持一致,相应的损失函数采用交叉熵损失(回归问题采用 Mse 损失 等)。
此外本文也用 torch 实现了 dropout 层。
import torch, torchvision
from torchvision import transforms
from torch import nn
from torch.utils.data import Dataset, DataLoader
# import d2l
random_seed = 123
torch.manual_seed(random_seed)
def dropout_layer(X, dropout):
assert 0 <= dropout <= 1
if dropout == 0:
return X
if dropout == 1:
return torch.zeros_like(X)
mask = (torch.rand_like(X) > dropout).float()
print(mask)
return mask * X / (1-dropout) # 进行缩放,经过 output 层前向传递结果不至于改变太大
# softmax 回归 test acc: 0.81
# 多层感知机 test acc: 0.82/ 0.81
# 多层感知机(+dropout) test acc: 0.82
class linear_regression(nn.Module):
"""
模型类: 线性回归
"""
def __init__(self, input_dim):
super(linear_regression, self).__init__()
self.flatten = nn.Flatten() # (1,28,28) -> (784)
self.linear1 = nn.Linear(input_dim, 256)
self.linear2 = nn.Linear(256, 256)
self.linear3 = nn.Linear(256, 10)
self.relu = nn.ReLU()
self.dropout = nn.Dropout(0.1)
def forward(self, X): # X:(bzs, input_dim)
X = self.flatten(X)
X = self.linear1(X)
X = self.dropout(self.relu(X))
X = self.linear2(X)
X = self.dropout(self.relu(X))
output = self.linear3(X)
return output
if __name__ == "__main__":
# X = torch.tensor([[0., 1., 2., 3., 4., 5., 6., 7.],[-1., -2., -3., -4., 1., 2., 3., 4.]])
# print(dropout_layer(X, 0.5))
# x = torch.arange(-8.0, 8.0, 0.1, requires_grad=True)
# y = torch.sigmoid(x)
# y.backward(torch.ones_like(x)) # 对 xx 求梯度
# tensor = torch.randn(3, 5)
# long_tensor = tensor.long()
# print(long_tensor)
trans = transforms.ToTensor()
mnist_train = torchvision.datasets.FashionMNIST(
root="../data", train=True, transform=trans)
mnist_test = torchvision.datasets.FashionMNIST(
root="../data", train=False, transform=trans)
bsz = 100
trainloader = DataLoader(mnist_train, batch_size=bsz)
testloader = DataLoader(mnist_test, batch_size=bsz)
net = linear_regression(784)
loss = nn.CrossEntropyLoss() # 包含了计算softmax和交叉熵损失的两种功能
lr = 0.01
optim = torch.optim.SGD(net.parameters(), lr=lr)
epoch = 20
for i in range(epoch):
for X, y in trainloader:
pred = net(X)
l = loss(pred, y)
optim.zero_grad()
l.backward()
optim.step()
train_acc = sum(pred.argmax(axis=1) == y).item() / len(y)
print(f"epoch:{i + 1} training loss:{l} training acc:{train_acc}")
for X, y in testloader:
pred = net(X)
l = loss(pred, y)
test_acc = sum(pred.argmax(axis=1) == y).item() / len(y)
print(f"epoch:{i + 1} test loss:{l} test acc:{test_acc}")
参考:动手学深度学习