全连接层的权重最多,卷积层的权重较少(共享权重)
RNN是一种专门用来处理带有序列模式的数据,如天气预测,文本处理等等(有上下文依赖关系)
RNN cell本质一个线性层且序列共享一个RNN cell
是输入序列
是隐藏层
重要参数
RNNCell输入输出尺寸
只用一个RNNCell观察输入输出维度
import torch
if __name__=='__main__':
batch_size = 1 #批次
seq_len = 3 #有三天的数据
input_size=4 #每天天气的特征数
hidden_size =2 #隐藏层维度
cell = torch.nn.RNNCell(input_size=input_size,hidden_size=hidden_size)
dataset=torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(batch_size,hidden_size) #第一个隐藏层是0向量
for idx,input in enumerate(dataset):
print('='*20,idx,'='*20)
print('input size:',input.shape)
hidden=cell(input,hidden) #rnn自动更新隐藏层
print('outputs size:',hidden.shape)
# print(hidden)
使用RNN,多一个参数num_layers,cell()的输出多一个out
RNN输入输出尺寸
import torch
if __name__=='__main__':
batch_size = 1 #批次
seq_len = 3 #有三天的数据
input_size=4 #每天天气的特征数
hidden_size =2 #隐藏层维度
num_layers=1
cell = torch.nn.RNN(input_size=input_size,hidden_size=hidden_size,num_layers=num_layers)
inputs=torch.randn(seq_len,batch_size,input_size)
hidden = torch.zeros(num_layers,batch_size,hidden_size) #第一个隐藏层是0向量
out,hidden = cell(inputs,hidden)
print('Output size:',out.shape)
print('Output:',out)
print('Hidden size',hidden.shape)
print('Hidden:',hidden)
使用RNNCell把hello变成ohlol
先把字母变成one-hot编码
import torch
if __name__=='__main__':
batch_size = 1
input_size = 4
hidden_size = 4
idx2char = ['e','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]
one_hot_lookup = [[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(-1,batch_size,input_size)
labels = torch.LongTensor(y_data).view(-1,1)
class Model(torch.nn.Module):
def __init__(self,input_size,hidden_size,batch_size):
super(Model,self).__init__()
self.batch_size=batch_size
self.input_size=input_size
self.hidden_size=hidden_size
self.rnncell = torch.nn.RNNCell(input_size=self.input_size,hidden_size=self.hidden_size)
def forward(self,input,hidden):
hidden=self.rnncell(input,hidden)
return hidden
def init_hidden(self):
return torch.zeros(self.batch_size,self.hidden_size)
net = Model(input_size,hidden_size,batch_size)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1)
for epoch in range(15):
loss=0
optimizer.zero_grad()
hidden = net.init_hidden()
print('Predicted string:',end='')
for input,label in zip(inputs,labels):
hidden =net(input,hidden)
loss+=criterion(hidden,label) #和才是最终损失
_,idx=hidden.max(dim=1) #找最大的下标,得到输出值
print(idx2char[idx.item()],end='')
loss.backward()
optimizer.step()
print(',Epoch[%d/15] loss=%.4f'%(epoch+1,loss.item()))
使用RNN实现
import torch
if __name__=='__main__':
batch_size = 1
input_size = 4
hidden_size = 4
num_layers = 1
seq_len = 5
idx2char = ['e','h','l','o']
x_data = [1,0,2,2,3]
y_data = [3,1,2,3,2]
one_hot_lookup = [[1,0,0,0],
[0,1,0,0],
[0,0,1,0],
[0,0,0,1]]
x_one_hot = [one_hot_lookup[x] for x in x_data]
inputs = torch.Tensor(x_one_hot).view(seq_len,batch_size,input_size)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self,input_size,hidden_size,batch_size,num_layers=1):
super(Model,self).__init__()
self.num_layers=num_layers
self.batch_size=batch_size
self.input_size=input_size
self.hidden_size=hidden_size
self.rnn = torch.nn.RNN(input_size=self.input_size,
hidden_size=self.hidden_size,
num_layers=num_layers)
def forward(self,input):
hidden=torch.zeros(self.num_layers,
self.batch_size,
self.hidden_size)
out,_ = self.rnn(input,hidden)
return out.view(-1,self.hidden_size)
net = Model(input_size,hidden_size,batch_size,num_layers)
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1) # 找最大的下标,得到输出值
idx = idx.data.numpy()
print('Predicted :',''.join([idx2char[x] for x in idx]),end='')
print(',Epoch[%d/15] loss=%.4f'%(epoch+1,loss.item()))
one-hot向量缺点
维度太大、稀疏、硬编码
常用解决办法:Embedding
import torch
if __name__=='__main__':
num_class = 4
batch_size = 1
input_size = 4
embedding_size = 10
hidden_size = 8
num_layers = 2
seq_len = 5
idx2char = ['e','h','l','o']
x_data = [[1,0,2,2,3]]
y_data = [3,1,2,3,2]
inputs = torch.LongTensor(x_data).view(batch_size, seq_len)
labels = torch.LongTensor(y_data)
class Model(torch.nn.Module):
def __init__(self):
super(Model,self).__init__()
self.emb = torch.nn.Embedding(input_size,embedding_size)
self.rnn = torch.nn.RNN(input_size=embedding_size,
hidden_size=hidden_size,
num_layers=num_layers,
batch_first=True)
self.fc=torch.nn.Linear(hidden_size,num_class)
def forward(self,x):
hidden=torch.zeros(num_layers,
x.size(0),
hidden_size)
x = self.emb(x)
x,_ = self.rnn(x,hidden)
x = self.fc(x)
return x.view(-1,num_class)
net = Model()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(net.parameters(),lr=0.1)
for epoch in range(15):
optimizer.zero_grad()
outputs = net(inputs)
loss = criterion(outputs,labels)
loss.backward()
optimizer.step()
_, idx = outputs.max(dim=1) # 找最大的下标,得到输出值
idx = idx.data.numpy()
print('Predicted :',''.join([idx2char[x] for x in idx]),end='')
print(',Epoch[%d/15] loss=%.4f'%(epoch+1,loss.item()))