DL笔记1 权重衰退

最新推荐文章于 2022-12-13 21:13:40 发布

借我千百年丶

最新推荐文章于 2022-12-13 21:13:40 发布

阅读量206

点赞数

分类专栏： DL笔记文章标签： pytorch 深度学习神经网络

原文链接：https://blog.csdn.net/weixin_44575152/article/details/110789160?utm_medium=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogCommendFromBaidu%7Edefault-9.no_search_link&depth_1-utm_source=distribute.pc_relevant.none-task-blog-2%7Edefault%7EBlogComme

版权

DL笔记专栏收录该内容

5 篇文章 0 订阅

订阅专栏

本文介绍了一个使用PyTorch实现的线性回归模型训练过程，特别关注了权重参数的L2正则化（weight decay）。通过设置不同的权重衰减系数，作者展示了如何影响模型的训练和测试损失，并观察了模型参数的权重L2范数变化。关键实验步骤包括初始化参数、训练迭代和评估指标的可视化。

摘要由CSDN通过智能技术生成

def train_concise(wd):
    net = nn.Sequential(nn.Linear(num_inputs, 1))
    for param in net.parameters():
        param.data.normal_()
    loss = nn.MSELoss()
    num_epochs, lr = 100, 0.003
    # 偏置参数没有衰减。
    trainer = torch.optim.SGD([
        {"params":net[0].weight,'weight_decay': wd},
        {"params":net[0].bias}], lr=lr)
    animator = d2l.Animator(xlabel='epochs', ylabel='loss', yscale='log',
                            xlim=[5, num_epochs], legend=['train', 'test'])
    for epoch in range(num_epochs):
        for X, y in train_iter:
            with torch.enable_grad():
                trainer.zero_grad()
                l = loss(net(X), y)
            l.backward()
            trainer.step()
        if (epoch + 1) % 5 == 0:
            animator.add(epoch + 1, (d2l.evaluate_loss(net, train_iter, loss),
                                     d2l.evaluate_loss(net, test_iter, loss)))
    print('w的L2范数：', net[0].weight.norm().item())

"""
@author: Inki
@contact: inki.yinji@gmail.com
@version: Created in 2020 1206, last modified in 2020 1206.
"""

import torch
import numpy as np
import matplotlib.pyplot as plt
from torch import nn
from torch import optim
from torch.utils import data


def generate_dataset(num_tr=20, num_te=100, num_inputs=200, w=None, b=0.05):
    """
    Generate the dataset.
    """
    if w is None:
        w = np.ones(num_inputs) * 0.01

    ret_features = np.random.randn(num_tr + num_te, num_inputs)
    ret_labels = np.dot(ret_features, w) + b
    ret_labels += np.random.normal(0, 0.01, len(ret_labels))
    ret_labels = np.reshape(ret_labels, (len(ret_labels), 1))
    ret_features = torch.tensor(ret_features, dtype=torch.float)
    ret_labels = torch.tensor(ret_labels, dtype=torch.float)
    return ret_features[:num_tr, :], ret_features[num_tr:, :], ret_labels[:num_tr], ret_labels[num_tr:]


def plot(x, y, x_label, y_label, x1=None, y1=None, legend=None):
    """
    The plot function.
    """
    plt.semilogy(x, y)
    plt.xlabel(x_label)
    plt.ylabel(y_label)

    if x1 and y1:
        plt.semilogy(x1, y1, linestyle=':')
        plt.legend(legend)
    plt.show()
    plt.close()


def fit(wd, tr_features, te_features, tr_labels, te_labels,
        batch_size=1, loss=nn.MSELoss(), num_epochs=100, lr=0.003):
    """
    The fit model.
    """

    temp_dataset = data.TensorDataset(tr_features, tr_labels)
    tr_iter = data.DataLoader(temp_dataset, batch_size, shuffle=True)

    # step 1. Initialize the parameters.
    net = nn.Linear(len(tr_features[0]), 1)
    nn.init.normal_(net.weight, mean=0, std=1)
    nn.init.normal_(net.bias, mean=0, std=1)

    temp_optimizer_w = optim.SGD(params=[net.weight], lr=lr, weight_decay=wd)
    temp_optimizer_b = optim.SGD(params=[net.bias], lr=lr)

    # Step 2. Train and test.
    temp_tr_ls, temp_te_ls = [], []
    for _ in range(num_epochs):
        for x, y in tr_iter:
            temp_ls = loss(net(x), y).mean()
            temp_optimizer_w.zero_grad()
            temp_optimizer_b.zero_grad()
            temp_ls.backward()
            temp_optimizer_w.step()
            temp_optimizer_b.step()

        temp_tr_ls.append(loss(net(tr_features), tr_labels).item())
        temp_te_ls.append(loss(net(te_features), te_labels).item())
    plot(range(1, num_epochs + 1), temp_tr_ls, 'epochs', 'loss',
         range(1, num_epochs + 1), temp_te_ls, ['train', 'test'])
    print("L2 norm of w:", net.weight.data.norm().item())


if __name__ == '__main__':
    temp_tr_features, temp_te_features, temp_tr_labels, temp_te_labels = generate_dataset()
    fit(3, temp_tr_features, temp_te_features, temp_tr_labels, temp_te_labels)

借我千百年丶

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
DL笔记1 权重衰退

def train_concise(wd): net = nn.Sequential(nn.Linear(num_inputs, 1)) for param in net.parameters(): param.data.normal_() loss = nn.MSELoss() num_epochs, lr = 100, 0.003 # 偏置参数没有衰减。 trainer = torch.optim.SGD([ {"param
复制链接

扫一扫