动手学深度学习总结

本文介绍了使用PyTorch实现的简化版深度学习教程,包括softmax回归(单层多分类)、多层感知机、多项式拟合展示欠拟合与过拟合现象,以及权重衰减的实践。通过实例演示,读者可以理解基本的神经网络原理和常见问题处理策略。
摘要由CSDN通过智能技术生成


将d2l_pytorch移动到相应位置

1.Softmax回归(单层多分类感知机)

import torch
from torch import nn
from torch.nn import init
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

#读取数据
batch_size = 256
train_iter, test_iter = d2l.load_data_fashion_mnist(batch_size)

num_inputs = 784
num_outputs = 10

class FlattenLayer(nn.Module):
    def __init__(self):
        super(FlattenLayer, self).__init__()
    def forward(self, x): # x shape: (batch, *, *, ...)
        return x.view(x.shape[0], -1)

from collections import OrderedDict
net = nn.Sequential(
    # FlattenLayer() 对数据做展平操作,
    # nn.Linear(num_inputs, num_outputs)
    OrderedDict([ ('flatten', FlattenLayer()),
                ('linear', nn.Linear(num_inputs, num_outputs))]))
# 输入net的应为tensorfloat型
init.normal_(net.linear.weight, mean=0, std=0.01) #正态分布随机初始化权重参数
init.constant_(net.linear.bias, val=0)  #常量初始化

loss = nn.CrossEntropyLoss()

optimizer = torch.optim.SGD(net.parameters(), lr=0.1) #net.parameters()是net的一个方法或实例

num_epochs = 10
d2l.train_ch3(net, train_iter, test_iter, loss, num_epochs,
              batch_size, None, None, optimizer)

2. 多层多分类感知机

import torch
from torch import nn
import d2lzh_pytorch as d2l

#加了ReLU()
net=nn.Sequential(nn.Flatten(),nn.Linear(784,256),nn.ReLU(),nn.Linear(256,10))

def init_weights(m):
    if type(m)==nn.Linear:
        nn.init.normal_(m.weight,std=0.01)
net.apply(init_weights)

#训练过程
batch_size,lr,num_epochs=256,0.1,10  #更像一个普通的BP神经网络
loss=nn.CrossEntropyLoss()
trainer=torch.optim.SGD(net.parameters(),lr=lr)

train_iter,test_iter=d2l.load_data_fashion_mnist(batch_size)
d2l.train_ch3(net,train_iter,test_iter,loss,num_epochs,batch_size,None,None,trainer)

3. 通过多项式拟合探讨欠拟合和过拟合

#通过多项式拟合来交互地探索这些概念
import torch
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

n_train, n_test, true_w, true_b = 100, 100, [1.2, -3.4, 5.6], 5
features = torch.randn((n_train + n_test, 1))
poly_features = torch.cat((features, torch.pow(features, 2),torch.pow(features, 3)), 1) #torch.cat 拼接张量,1为纵向,0为横向
labels = (true_w[0] * poly_features[:, 0] + true_w[1]/2 *poly_features[:, 1]+true_w[2]/6 * poly_features[:, 2] + true_b)
labels += torch.tensor(np.random.normal(0, 0.01,size=labels.size()), dtype=torch.float)

# 先定义作图函数
def semilogy(x_vals, y_vals, x_label, y_label, x2_vals=None,y2_vals=None,
             legend=None, figsize=(3.5, 2.5)):
    d2l.set_figsize(figsize)
    d2l.plt.xlabel(x_label)
    d2l.plt.ylabel(y_label)
    d2l.plt.semilogy(x_vals, y_vals)
    if x2_vals and y2_vals:
        d2l.plt.semilogy(x2_vals, y2_vals, linestyle=':')
        d2l.plt.legend(legend)

# 训练过程和softmax回归相似
num_epochs, loss = 400, torch.nn.MSELoss()

def fit_and_plot(train_features, test_features, train_labels, test_labels):
    net = torch.nn.Linear(train_features.shape[-1], 1)  # 注意括号中左项
    # 通过Linear⽂档可知,pytorch已经将参数初始化了,所以我们这⾥就不⼿动初始化了
    batch_size = min(10, train_labels.shape[0])
    dataset = torch.utils.data.TensorDataset(train_features, train_labels)  # 生成tensor数据集
    train_iter = torch.utils.data.DataLoader(dataset, batch_size, shuffle=True)

    optimizer = torch.optim.SGD(net.parameters(), lr=0.01)
    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            l = loss(net(X), y.view(-1, 1))  # view类似于reshape
            optimizer.zero_grad()
            l.backward()
            optimizer.step()
        train_labels = train_labels.view(-1, 1)
        test_labels = test_labels.view(-1, 1)
        train_ls.append(loss(net(train_features), train_labels).item())
        test_ls.append(loss(net(test_features), test_labels).item())
    print('final epoch: train loss', train_ls[-1], 'test loss', test_ls[-1])
    semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss', range(1, num_epochs + 1), test_ls, ['train', 'test'])
    print('weight:', net.weight.data, '\nbias:', net.bias.data)

fit_and_plot(poly_features[:n_train, :], poly_features[n_train:, :],labels[:n_train], labels[n_train:])
# 线性函数拟合非线性(欠拟合)
fit_and_plot(features[:n_train, :], features[n_train:, :],labels[:n_train],labels[n_train:])
# 训练数据不够(过拟合)
fit_and_plot(poly_features[0:10, :], poly_features[n_train:,:],labels[0:10],labels[n_train:])

4. 权重衰退

import torch
import torch.nn as nn
import numpy as np
import sys
sys.path.append("..")
import d2lzh_pytorch as d2l

n_train, n_test, num_inputs = 20, 100, 200  #注意此处的200维,数据量还没有变量维数多  
true_w, true_b = torch.ones(num_inputs, 1) * 0.01, 0.05
features = torch.randn((n_train + n_test, num_inputs)) #torch.randn()从标准正态分布N(0,1)中抽取一系列随机数
labels = torch.matmul(features, true_w) + true_b
#labels += torch.tensor(np.random.normal(0, 0.01,size=labels.size()), dtype=torch.float)
print(labels[:2,:])
train_features, test_features = features[:n_train, :],features[n_train:, :]
train_labels, test_labels = labels[:n_train], labels[n_train:]

#定义训练和测试,引入L2惩罚
batch_size, num_epochs, lr = 5, 100, 0.003
net, loss = d2l.linreg, d2l.squared_loss
dataset = torch.utils.data.TensorDataset(train_features,train_labels)
train_iter = torch.utils.data.DataLoader(dataset, batch_size,shuffle=True)

def fit_and_plot_pytorch(wd):
    # 对权重参数衰减。权重名称⼀般是以weight结尾
    net = nn.Linear(num_inputs, 1)
    nn.init.normal_(net.weight, mean=0, std=1)
    nn.init.normal_(net.bias, mean=0, std=1)
    # 分开写optimizer对w和b的更新方式
    optimizer_w = torch.optim.SGD(params=[net.weight], lr=lr, weight_decay=wd)  # 对权重参数衰减
    optimizer_b = torch.optim.SGD(params=[net.bias], lr=lr)  # 不对偏差参数衰减

    train_ls, test_ls = [], []
    for _ in range(num_epochs):
        for X, y in train_iter:
            l = loss(net(X), y).mean()
            optimizer_w.zero_grad()
            optimizer_b.zero_grad()

            l.backward()  # 得到每层的dw和db

            # 对两个optimizer实例分别调⽤step函数,从⽽分别更新权重和偏差
            optimizer_w.step()  # 实际上作差即可
            optimizer_b.step()
        train_ls.append(loss(net(train_features), train_labels).mean().item())
        test_ls.append(loss(net(test_features), test_labels).mean().item())
    d2l.semilogy(range(1, num_epochs + 1), train_ls, 'epochs', 'loss',
                 range(1, num_epochs + 1), test_ls, ['train', 'test'])
    print('L2 norm of w:', net.weight.data.norm().item())

fit_and_plot_pytorch(0)

# fit_and_plot_pytorch(10)
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值