关于Pytorch中parameters和buffer的理解

在深度学习训练过程中,有时需要在网络中添加额外的参数,这时候可有选择nn.Parameter()或者register_buffer()的方式,关于buffer和Parameter,我的理解是:它们都是可以随着model.cuda()一起加载到gpu上的,也可以一起被会保存在model.state_dict()中,但是不同之处在于,nn.Parameter()中的变量是可以计算梯度。并且会被optimizer.step()更新的,而register_buffer()一般来说,requires_grad默认为False,就是说一般不计算梯度,如果手动设置成True可以计算梯度,但是optimizer.step()时,并不会被更新。

import torch
from torch import nn
 
class mymodel(nn.Module):
    def __init__(self):
        super(mymodel, self).__init__()
        self.linear = nn.Linear(4, 1)
    def forward(self, x):
        return self.linear(x)
 
model = mymodel()
print(list(model.named_parameters()))

[(‘linear.weight’, Parameter containing:
tensor([[ 0.1173, -0.0988, -0.0900, -0.2869]], requires_grad=True)), (‘linear.bias’, Parameter containing:
tensor([-0.1965], requires_grad=True))]

注册weight parameters,名字为test

print('注册weight parameters,名字为test:')
my_test = nn.Parameter(torch.tensor([[-0.0995,  0.4867, -0.4920, -0.1703]]))
model.register_parameter('test',my_test)
print(list(model.named_parameters()))

[(‘test’, Parameter containing:
tensor([[-0.0995, 0.4867, -0.4920, -0.1703]], requires_grad=True)), (‘linear.weight’, Parameter containing:
tensor([[ 0.2731, 0.3823, -0.1088, -0.3679]], requires_grad=True)), (‘linear.bias’, Parameter containing:
tensor([-0.4443], requires_grad=True))]

See https://zhuanlan.zhihu.com/p/571590961?utm_id=0

以下是一个使用PyTorch实现Transformer模型的完整代码示例: ```python import torch import torch.nn as nn import torch.optim as optim import torch.nn.functional as F class PositionalEncoding(nn.Module): def __init__(self, d_model, max_len=5000): super(PositionalEncoding, self).__init__() self.dropout = nn.Dropout(p=0.1) pe = torch.zeros(max_len, d_model) position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) pe[:, 0::2] = torch.sin(position * div_term) pe[:, 1::2] = torch.cos(position * div_term) pe = pe.unsqueeze(0).transpose(0, 1) self.register_buffer('pe', pe) def forward(self, x): x = x + self.pe[:x.size(0), :] return self.dropout(x) class TransformerModel(nn.Module): def __init__(self, ntoken, d_model, nhead, dim_feedforward, num_layers, dropout=0.5): super(TransformerModel, self).__init__() self.model_type = 'Transformer' self.embedding = nn.Embedding(ntoken, d_model) self.pos_encoder = PositionalEncoding(d_model) encoder_layer = nn.TransformerEncoderLayer(d_model, nhead, dim_feedforward, dropout) self.transformer_encoder = nn.TransformerEncoder(encoder_layer, num_layers) self.fc = nn.Linear(d_model, ntoken) self.init_weights() def _generate_square_subsequent_mask(self, sz): mask = torch.triu(torch.ones(sz, sz)) == 1 mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) return mask def init_weights(self): initrange = 0.1 self.embedding.weight.data.uniform_(-initrange, initrange) self.fc.bias.data.zero_() self.fc.weight.data.uniform_(-initrange, initrange) def forward(self, src): src = self.embedding(src) src = self.pos_encoder(src) mask = self._generate_square_subsequent_mask(src.size(0)).to(src.device) output = self.transformer_encoder(src, mask) output = self.fc(output) return F.log_softmax(output, dim=-1) # 使用示例: ntoken = 1000 # 词汇表大小 d_model = 512 # 模型维度 nhead = 8 # 多头注意力头数 dim_feedforward = 2048 # 前馈神经网络隐藏层维度 num_layers = 6 # Transformer编码器层数 dropout = 0.5 model = TransformerModel(ntoken, d_model, nhead, dim_feedforward, num_layers, dropout) criterion = nn.NLLLoss() optimizer = optim.Adam(model.parameters(), lr=0.001) # 训练过程 def train(): model.train() total_loss = 0. for batch in training_data: optimizer.zero_grad() src, tgt = batch output = model(src) loss = criterion(output.view(-1, ntoken), tgt.view(-1)) loss.backward() optimizer.step() total_loss += loss.item() return total_loss / len(training_data) # 测试过程 def evaluate(): model.eval() total_loss = 0. with torch.no_grad(): for batch in test_data: src, tgt = batch output = model(src) total_loss += criterion(output.view(-1, ntoken), tgt.view(-1)).item() return total_loss / len(test_data) ``` 请注意,上述代码的`training_data`和`test_data`是训练和测试数据的例子,需要根据实际情况进行替换。此外,还需要根据任务的具体要求调整模型的超参数和其他设置。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值