李沐-《动手学深度学习》
1.深度循环网络
import torch
from torch import nn
from d2l import torch as d2l
banch_size,num_steps=32,35
train_iter,vocab=d2l.load_data_time_machine(banch_size,num_steps)
#num_layers设定隐藏层数
vocab_size,num_hiddens,num_layers=len(vocab),256,2
num_inputs=vocab_size
device=d2l.try_gpu()
lstm_layer=nn.LSTM(num_inputs,num_hiddens,num_layers)
"""input_size - 输入中预期的特征数量`x`。
hidden_size - 隐藏状态下的特征数量`h`。
num_layers - 循环层的数量。例如,设置 "num_layers=2 "意味着将两个LSTM堆叠在一起,形成一个 "堆叠LSTM",第二个LSTM接收第一个LSTM的输出并计算最终结果。默认值:1
bias - 如果`False`,那么该层不使用偏置权重`b_ih`和`b_hh`。"""
model=d2l.RNNModel(lstm_layer,len(vocab))
model=model.to(device)
#训练
num_epochs,lr=500,2
d2l.train_ch8(model,train_iter,vocab,lr,num_epochs,device)
2.编码器解码器架构\
from torch import nn
#@save
class Encoder(nn.Module):
"""编码器-解码器的基本编码器接口"""
def __init__(self,**kwangs):
super(Encoder, self).__init__(**kwangs)
def forward(self, X, *args):
raise NotImplementedError
#@save
class Decoder(nn.Module):
"""编码器-解码器的基本解码器接口"""
def __init__(self,**kwargs):
super(Decoder, self).__init__(**kwargs)
#enc_output输出,
def init_state(self,enc_output,*args):
raise NotImplementedError
def forward(self, X, state):
raise NotImplementedError
#@save
class EncoderDecoder(nn.Module):
"""编码器-解码器架构的基类"""
def __init__(self,encoder,decoder,**kwangs):
super(EncoderDecoder, self).__init__(**kwangs)
self.encoder=encoder
self.decoder=decoder
def forward(self, enc_X,dec_X,*args):
#编码器的输出
enc_outputs=self.encoder(enc_X,*args)
#解码器的状态
dec_state=self.decoder.init_state(enc_outputs,*args)
return self.decoder(dec_X,dec_state)