基于nn.Cell 自定义了一个LSTM网络,代码如下:
import math
import numpy as orinp
import mindspore.numpy as np
from mindspore import nn,ops
from mindspore import Tensor,Parameter
from models import w2vutils
import common
import mindspore
class ScaleL2Loss(nn.LossBase):
'''
自定义loss函数,会放大
'''
def __init__(self,max_len=30):
super(ScaleL2Loss, self).__init__("mean")
self.abs = ops.Abs()
self.square = ops.Square()
self.max_len = max_len
def construct(self, base, target):
factor = self.abs(base -1) * (self.max_len - 1) + 1 #放大因子,放大负采样数据
loss = factor * ( base - target )
loss = self.square(loss)/self.max_len
return self.get_loss(loss)
class LSTMScorer(nn.Cell):
def __init__(self,vocab_size,embedding_dim,embedding_table,lstm_kernel_size=128,seq_len=30,device_type:str=common._device_choices[2]):
super(LSTMScorer,self).__init__()
self.embedding = nn.Embedding(vocab_size,embedding_dim,embedding_table=embedding_table,padding_idx=0)
if device_type == common._device_choices[2]:
self.lstm = nn.LSTMCell(input_size=embedding_dim,hidden_size=lstm_kernel_size,has_bias=True,batch_first=True,dropout=0.2,bidirectional=True)
else:
self.lstm = nn.LSTM(input_size=embedding_dim,hidden_size=lstm_kernel_size,has_bias=True,batch_first=True,dropout=0.2,bidirectional=True)
self.flatten = nn.Flatten()
self.dense = nn.Dense(embedding_dim*seq_len,seq_len)
stdv = 1 / math.sqrt(lstm_kernel_size)
lstm_weight_size =int((embedding_dim+lstm_kernel_size)*2*lstm_kernel_size*4 + 2*lstm_kernel_size*4)
self.lstm.weight = Parameter(orinp.random.uniform(-stdv, stdv, (lstm_weight_size, 1, 1)).astype(orinp.float32),name="LSTMPARAMS")
def construct(self,x):
batch_size = 1
if len(x.shape) == 1:
x = x.reshape((1, x.shape[0]))
else:
batch_size = x.shape[0]
h = Tensor(np.ones([2, batch_size, 128]).astype(np.float32))
c = Tensor(np.ones([2, batch_size, 128]).astype(np.float32))
x = self.embedding(x)
x, h, c, _, _ = self.lstm(x, h, c, self.lstm.weight)
x = self.flatten(x)
y = self.dense(x)
return y
在GRAPH_MODE 下出错,原因是GRAPH_MODE 模式下不能生产Tensor类型数据。
那么,h和c要在哪里创建呢。因为batch_size的问题,一轮迭代最后一批数据很难保证和预设的batchsize一致。所以h,c 动态创建是不可避免的。应该怎么做呢。
解答:
@constexprdef _init_state(shape, dtype, is_lstm):
hx = Tensor(np.zeros(shape), dtype)
cx = Tensor(np.zeros(shape), dtype)
if is_lstm:
return (hx, cx)
return hx
参考实现:https://gitee.com/mindspore/mindspore/blob/master/mindspore/nn/layer/rnns.py#L361
所有的h,c初始化都是0