import os
import numpy as np
import random
import gensim
def get_data(file,num=None):
with open(file,"r",encoding="utf-8") as f:
all_data = f.read().split("\n")
if num is None:
return all_data
return all_data[:num]
def train_word2vec(file):
if os.path.exists("word2vec.vec"):
return gensim.models.Word2Vec.load("word2vec.vec")
with open(file,encoding="utf-8") as f:
all_data = f.read().split("\n")
model = gensim.models.Word2Vec(all_data,vector_size=128,window=10,min_count=1,sg=1,hs=0,workers=5)
model.save("word2vec.vec")
return model
def make_x_y(word2vec,poetry):
x_poetry = poetry[:-1]
y_poetry = poetry[1:]
x_emb = [word2vec.wv[i] for i in x_poetry]
y_idx = [word2vec.wv.key_to_index[i] for i in y_poetry]
y_onehot = np.zeros((len(y_idx),len(word2vec.wv.key_to_index)))
for i,idx in enumerate(y_idx):
y_onehot[i][idx] = 1
return np.array(x_emb),y_onehot
def softmax(x):
ex = np.exp(x)
sum_ex = np.sum(ex,axis=1,keepdims=True)
return ex/sum_ex
if __name__ == "__main__":
all_poetry = get_data(os.path.join("..","data","古诗生成","poetry_5.txt"),200)
train_word2vec(os.path.join("..","data","古诗生成","poetry_5.txt"))
word2vec = gensim.models.Word2Vec.load("word2vec.vec")
corpus_num = len(word2vec.wv.index_to_key)
batch_size = 20
epoch = 100
lr = 0.0004
embedding_num = 128
hidden_num = 50
max_grad = 2
W = np.random.normal(0,2/np.sqrt(embedding_num),size=(embedding_num,hidden_num))
U = np.random.normal(0,1/np.sqrt(hidden_num),size=(hidden_num,hidden_num))
V = np.random.normal(0,1/np.sqrt(hidden_num),size=(hidden_num,corpus_num))#用来去做预测的。
bias_W = np.zeros((1,W.shape[1]))#偏置项和它对应的列是一致的。
bias_U = np.zeros((1,U.shape[1]))
bias_V = np.zeros((1,V.shape[1]))
for e in range(epoch):
for pi,poetry in enumerate(all_poetry):
x_embs,labels = make_x_y(word2vec,poetry)
a_prev = np.zeros((1,hidden_num))
caches= []
sent_loss = 0
for x,y in zip(x_embs,labels):
# x = x.reshape(1,-1)
x = x[None]
h = x @ W + bias_W
h2 = a_prev @ U + bias_U
h_ = h + h2
h__= np.tanh(h_)
pre = h__ @ V + bias_V
pro = softmax(pre)
sent_loss += -np.sum(y*np.log(pro))
caches.append((x,y,pro,h__,h_,a_prev))
a_prev = h__
dh__ = 0
DW = 0
DU = 0
DV = 0
DBW = 0
DBU = 0
DBV = 0
for x,y,pro,h__,h_,a_prev in caches[::-1]:
G = (pro - y)
DV += h__.T @ G
delta_h__ = G @ V.T + dh__
delta_h_ = delta_h__ * (1-h_**2)
DU += a_prev.T @ delta_h_
DW += x.T @ delta_h_
dh__ = delta_h_ @ U.T
DBV += np.sum(G,axis=0,keepdims=True)
DBU += np.sum(delta_h_,axis=0,keepdims=True)
DBW += np.sum(delta_h_,axis=0,keepdims=True)
DW = np.clip(DW,-max_grad,max_grad)
W -= lr * DW
U -= lr * DU
V -= lr * DV
bias_W -= lr * DBW
bias_U -= lr * DBU
bias_V -= lr * DBV
if pi%100==0:
print(sent_loss)
1123手写RNN
最新推荐文章于 2024-09-15 18:10:00 发布