动手学深度学习部分(均为简化版实现)
将d2l_pytorch移动到相应位置
1.Softmax回归(单层多分类感知机)
import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
#读取数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)
num_inputs = 784
num_outputs = 10
class FlattenLayer(nn.Module):
def __init__(self):
super(FlattenLayer, self).__init__()
def forward(self, x): # x shape: (batch, *, *, ...)
return x.view(x.shape[0], -1)
from collections import OrderedDict
net = nn.Sequential(
# FlattenLayer() 对数据做展平操作,
# nn.Linear(num_inputs, num_outputs)
OrderedDict([ ('flatten', FlattenLayer()),
('linear', nn.Linear(num_inputs, num_outputs))]))
# 输入net的应为tensorfloat型
init.normal_(net.linear.weight, mean=0, std=0.01) #正态分布随机初始化权重参数
init.constant_(net.linear.bias, val=0) #常量初始化
loss = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(net.parameters(), lr=0.1) #net.parameters()是net的一个方法或实例
num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs,
batch_size, None, None, optimizer)
2. 多层多分类感知机
import torch
from torch import nn
import d2lzh_pytorch as d2l
#加了ReLU()
net=nn.Sequential(nn.Flatten(),nn.Linear(784,256),nn.ReLU(),nn.Linear(256,10))
def init_weights(m):
if type(m)==nn.Linear:
nn.init.normal_(m.weight,std=0.01)
net.apply(init_weights)
#训练过程
batch_size,lr,num_epochs=256,0.1,10 #更像一个普通的BP神经网络
loss=nn.CrossEntropyLoss()
trainer=torch.optim.SGD(net.parameters(),lr=lr)
train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,trainer)
3. 通过多项式拟合探讨欠拟合和过拟合
#通过多项式拟合来交互地探索这些概念
import torch
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5
features = torch.randn((n_train + n_test, 1))
poly_features = torch.cat((features, torch.pow(features, 2),torch.pow(features, 3)), 1) #torch.cat 拼接张量,1为纵向,0为横向
labels = (true_w[0] * poly_features[:, 0] + true_w[1]/2 *poly_features[:, 1]+true_w[2]/6 * poly_features[:, 2] + true_b)
labels += torch.tensor(np.random.normal(0, 0.01,size=labels.size()), dtype=torch.float)
# 先定义作图函数
def semilogy(x_vals, y_vals, x_label, y_label, x2_vals=None,y2_vals=None,
legend=None, figsize=(3.5, 2.5)):
d2l.set_figsize(figsize)
d2l.plt.xlabel(x_label)
d2l.plt.ylabel(y_label)
d2l.plt.semilogy(x_vals, y_vals)
if x2_vals and y2_vals:
d2l.plt.semilogy(x2_vals, y2_vals, linestyle=':')
d2l.plt.legend(legend)
# 训练过程和softmax回归相似
num_epochs, loss = 400, torch.nn.MSELoss()
def fit_and_plot(train_features, test_features, train_labels, test_labels):
net = torch.nn.Linear(train_features.shape[-1], 1) # 注意括号中左项
# 通过Linear⽂档可知,pytorch已经将参数初始化了,所以我们这⾥就不⼿动初始化了
batch_size = min(10, train_labels.shape[0])
dataset = torch.utils.data.TensorDataset(train_features, train_labels) # 生成tensor数据集
train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True)
optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
train_ls, test_ls = [], []
for _ in range(num_epochs):
for X, y in train_iter:
l = loss(net(X), y.view(-1, 1)) # view类似于reshape
optimizer.zero_grad()
l.backward()
optimizer.step()
train_labels = train_labels.view(-1, 1)
test_labels = test_labels.view(-1, 1)
train_ls.append(loss(net(train_features), train_labels).item())
test_ls.append(loss(net(test_features), test_labels).item())
print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1])
semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test'])
print('weight:', net.weight.data, '\nbias:', net.bias.data)
fit_and_plot(poly_features[:n_train, :], poly_features[n_train:, :],labels[:n_train], labels[n_train:])
# 线性函数拟合非线性(欠拟合)
fit_and_plot(features[:n_train, :], features[n_train:, :],labels[:n_train],labels[n_train:])
# 训练数据不够(过拟合)
fit_and_plot(poly_features[0:10, :], poly_features[n_train:,:],labels[0:10],labels[n_train:])
4. 权重衰退
import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l
n_train, n_test, num_inputs = 20, 100, 200 #注意此处的200维,数据量还没有变量维数多
true_w, true_b = torch.ones(num_inputs, 1) * 0.01, 0.05
features = torch.randn((n_train + n_test, num_inputs)) #torch.randn()从标准正态分布N(0,1)中抽取一系列随机数
labels = torch.matmul(features, true_w) + true_b
#labels += torch.tensor(np.random.normal(0, 0.01,size=labels.size()), dtype=torch.float)
print(labels[:2,:])
train_features, test_features = features[:n_train, :],features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]
#定义训练和测试,引入L2惩罚
batch_size, num_epochs, lr = 5, 100, 0.003
net, loss = d2l.linreg, d2l.squared_loss
dataset = torch.utils.data.TensorDataset(train_features,train_labels)
train_iter = torch.utils.data.DataLoader(dataset, batch_size,shuffle=True)
def fit_and_plot_pytorch(wd):
# 对权重参数衰减。权重名称⼀般是以weight结尾
net = nn.Linear(num_inputs, 1)
nn.init.normal_(net.weight, mean=0, std=1)
nn.init.normal_(net.bias, mean=0, std=1)
# 分开写optimizer对w和b的更新方式
optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd) # 对权重参数衰减
optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr) # 不对偏差参数衰减
train_ls, test_ls = [], []
for _ in range(num_epochs):
for X, y in train_iter:
l = loss(net(X), y).mean()
optimizer_w.zero_grad()
optimizer_b.zero_grad()
l.backward() # 得到每层的dw和db
# 对两个optimizer实例分别调⽤step函数,从⽽分别更新权重和偏差
optimizer_w.step() # 实际上作差即可
optimizer_b.step()
train_ls.append(loss(net(train_features), train_labels).mean().item())
test_ls.append(loss(net(test_features), test_labels).mean().item())
d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
range(1, num_epochs + 1), test_ls, ['train', 'test'])
print('L2 norm of w:', net.weight.data.norm().item())
fit_and_plot_pytorch(0)
# fit_and_plot_pytorch(10)