import torch
import torch.nn as nn
import numpy as np
class TorchRNN(nn.Module):
def __init__(self,input_size,hidden_size):
super(TorchRNN,self).__init__()
self.layer = nn.RNN(input_size,hidden_size,bias=False,batch_first=True)
def forward(self,x):
return self.layer(x)
x = np.array([[1, 2, 3, 4],
[3, 4, 5, 6],
[5, 6, 7, 8],
[7, 8, 9, 10]])
hidden_size = 5
torch_model = TorchRNN(4, hidden_size)
w_ih = torch_model.state_dict()["layer.weight_ih_l0"]
w_hh = torch_model.state_dict()["layer.weight_hh_l0"]
class MyRNN:
def __init__(self,w_ih,w_hh,hidden_size):
self.w_ih = w_ih
self.w_hh = w_hh
self.hidden_size = hidden_size
def forward(self,x):
ht = np.zeros((self.hidden_size))
output = []
for xt in x:
ux = np.dot(xt,self.w_ih.T)
wh = np.dot(ht,self.w_hh.T)
ht_next = np.tanh(ux+wh)
output.append(ht_next)
ht = ht_next
return np.array(output),ht
x = np.array([[1, 2, 3, 4],
[3, 4, 5, 6],
[5, 6, 7, 8],
[7, 8, 9, 10]])
torch_x = torch.FloatTensor([x])
output, h = torch_model.forward(torch_x)
print(h)
print(output.detach().numpy(), "torch模型预测结果")
print(h.detach().numpy(), "torch模型预测隐含层结果")
print("---------------")
diy_model = MyRNN(w_ih, w_hh, hidden_size)
output, h = diy_model.forward(x)
print(output, "diy模型预测结果")
print(h, "diy模型预测隐含层结果")
==========================
[[[ 0.9770124 -0.98210144 -0.898459 0.43363687 -0.7096077 ]
[ 0.99852514 -0.9999082 -0.97600037 0.87200433 -0.49491423]
[ 0.999937 -0.9999985 -0.9864487 0.9611373 -0.592286 ]
[ 0.99999815 -1. -0.99189234 0.98961574 -0.72276914]]] torch模型预测结果
[[[ 0.99999815 -1. -0.99189234 0.98961574 -0.72276914]]] torch模型预测隐含层结果
---------------
[[ 0.97701239 -0.98210147 -0.89845902 0.43363689 -0.70960771]
[ 0.99852516 -0.9999082 -0.97600034 0.87200431 -0.49491426]
[ 0.99993697 -0.9999985 -0.98644868 0.96113729 -0.59228603]
[ 0.99999817 -0.99999997 -0.99189236 0.98961571 -0.72276908]] diy模型预测结果
[ 0.99999817 -0.99999997 -0.99189236 0.98961571 -0.72276908] diy模型预测隐含层结果
import torch
import torch.nn as nn
import numpy as np
import random
import json
import matplotlib.pyplot as plt
"""
基于pytorth的网络编写
实现一个网络完成一个简单nlp任务
判断文本中是否有某些特定字符出现
"""
class TorchModel(nn.Module):
def __init__(self, vector_dim, sentence_length, vocab,hidden_dim,output_dim):
super(TorchModel, self).__init__()
self.embedding = nn.Embedding(len(vocab), vector_dim)
self.rnn = nn.RNN(vector_dim, hidden_dim,batch_first=True)
self.classify = nn.Linear(hidden_dim, output_dim)
self.loss = nn.CrossEntropyLoss()
def forward(self, x, y=None):
x = self.embedding(x)
x,_ = self.rnn(x)
x = x[:, -1, :]
y_pred = self.classify(x)
if y is not None:
return self.loss(y_pred, y)
else:
return y_pred
def build_vocab():
chars = "abcdefghij"
vocab = {"pad":0}
for index, char in enumerate(chars):
vocab[char] = index+1
vocab['unk'] = len(vocab)
return vocab
def build_sample(vocab, sentence_length):
remaining_keys = list(vocab.keys())
remaining_keys.remove('a')
x = random.sample(remaining_keys, sentence_length - 1)
x.append('a')
random.shuffle(x)
y = x.index('a')
x = [vocab[word] for word in x]
return x, y
def build_dataset(sample_length,vocab,sentence_length):
dataset_x = []
dataset_y = []
for i in range(sample_length):
x,y = build_sample(vocab,sentence_length)
dataset_x.append(x)
dataset_y.append(y)
return torch.LongTensor(dataset_x),torch.LongTensor(dataset_y)
def build_model(vector_dim, sentence_length, vocab,hidden_dim,output_dim):
model = TorchModel(vector_dim, sentence_length, vocab,hidden_dim,output_dim)
return model
def evaluate(model, vocab, sentence_length):
model.eval()
test_sample_num = 200
x, y = build_dataset(test_sample_num, vocab, sentence_length)
correct = 0
with torch.no_grad():
y_pred = model(x)
_, predicted_labels = torch.max(y_pred, 1)
correct += (predicted_labels == y).sum().item()
print(f'本次测试集预测准确率为{correct / test_sample_num}')
return correct / test_sample_num
def main():
epoch_num = 20
batch_size = 2
train_sample = 50
char_dim = 20
sentence_length = 5
learning_rate = 0.01
hidden_dim = 10
output_dim = 5
vocab = build_vocab()
model = build_model(char_dim, sentence_length, vocab,hidden_dim,output_dim)
optimizer = torch.optim.Adam(model.parameters(),lr=learning_rate)
log = []
x,y = build_dataset(train_sample,vocab,sentence_length)
for epoch in range(epoch_num):
model.train()
watch_loss = []
for batch_index in range(train_sample//batch_size):
x_train = x[batch_index*batch_size:(batch_index+1)*batch_size]
y_train = y[batch_index*batch_size:(batch_index+1)*batch_size]
optimizer.zero_grad()
loss = model(x_train,y_train)
loss.backward()
optimizer.step()
watch_loss.append(loss.item())
print(f'===========第{epoch+1}轮训练结果,平均loss:{np.mean(watch_loss)}============')
acc = evaluate(model,vocab,sentence_length)
log.append([acc,float(np.mean(watch_loss))])
torch.save(model.state_dict(),'nlpmodel.pth')
writer = open("vocab.json", "w", encoding="utf8")
writer.write(json.dumps(vocab, ensure_ascii=False, indent=2))
writer.close()
plt.plot(range(1,epoch_num+1),[i[0] for i in log],label='acc')
plt.plot(range(1,epoch_num+1),[i[1] for i in log],label='loss')
plt.legend()
plt.show()
return
def predicr(model_path,vocab_path,input_strings):
char_dim = 20
sentence_length = 5
hidden_dim = 10
output_dim = 5
vocab = json.load(open(vocab_path,'r',encoding='utf-8'))
model = build_model(char_dim, sentence_length, vocab,hidden_dim,output_dim)
model.load_state_dict(torch.load(model_path))
x = []
for i in input_strings:
x.append([vocab[j] for j in i])
model.eval()
with torch.no_grad():
result = model(torch.LongTensor(x))
_, predicted_positions = torch.max(result, dim=1)
for i, pred_pos in enumerate(predicted_positions):
print(f'输入:{input_strings[i]},预测位置:{pred_pos.item()}')
if __name__ == '__main__':
main()
test_strings = ["abcde", "bacdf", "aebdc",]
predicr("nlpmodel.pth", "vocab.json", test_strings)