问题出现背景:博主在同一个tf.name_scope(name)下同时处理音频和文本的序列特征时,使用两次tf.nn.dynamic_rnn()函数时出现该错误:
ValueError: Variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel already exists, disallowed. Did you mean to set reuse=True or reuse=tf.AUTO_REUSE in VarScope?
问题出现分析:同时调用两次RNN时,第一次调用RNN时没有任何问题,关键是出在第二次调用RNN时,由于第一次tf.nn.dynamic_rnn()处理音频序列时会产生一个权重,而再次调用RNN,再次使用tf.nn.dynamic_rnn()处理文本序列时,这时候系统里应该存在两个不同的lstm_cell模型,但是系统无法辨别出来,因此会出现kernel already exists。(此处参考https://blog.csdn.net/u013041398/article/details/74941991)
问题解决:需要用tf.variable_scope(name)来定义不同的作用范围。
贴上报错代码:
def dropout():
if self.config.rnn == 'lstm':
cell = tf.contrib.rnn.BasicLSTMCell(self.config.hidden_dim)
else:
cell = tf.contrib.rnn.GRUCell(self.config.hidden_dim)
return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
with tf.name_scope("rnn"):
cells1 = [dropout() for _ in range(self.config.num_layers)]
rnn_cell1 = tf.contrib.rnn.MultiRNNCell(cells1, state_is_tuple=True)
audio_value, _ = tf.nn.dynamic_rnn(cell=rnn_cell1, inputs=self.audio_encoder_inputs, dtype=tf.float32)
audio_value = tf.transpose(audio_value, [1, 0, 2])
self.audio_encoder = tf.gather(audio_value, int(audio_value.get_shape()[0]) - 1)
self.final_audio_encoder = tf.concat([self.audio_encoder, self.encoder_prosody], axis=1)
cells2 = [dropout() for _ in range(self.config.num_layers)]
rnn_cell2 = tf.contrib.rnn.MultiRNNCell(cells2, state_is_tuple=True)
data = tf.nn.embedding_lookup(self.wordVectors, self.text_encoder_inputs)
data = tf.cast(data, tf.float32)
text_value, _ = tf.nn.dynamic_rnn(cell=rnn_cell2, inputs=data, dtype=tf.float32)
修改后的代码:
def dropout():
with tf.variable_scope('lstmcell', reuse=True):
if self.config.rnn == 'lstm':
cell = tf.contrib.rnn.BasicLSTMCell(self.config.hidden_dim)
else:
cell = tf.contrib.rnn.GRUCell(self.config.hidden_dim)
return tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=self.keep_prob)
with tf.name_scope("rnn"):
with tf.variable_scope("audio_rnn"):
cells1 = [dropout() for _ in range(self.config.num_layers)]
rnn_cell1 = tf.contrib.rnn.MultiRNNCell(cells1, state_is_tuple=True)
audio_value, _ = tf.nn.dynamic_rnn(cell=rnn_cell1, inputs=self.audio_encoder_inputs, dtype=tf.float32)
audio_value = tf.transpose(audio_value, [1, 0, 2])
self.audio_encoder = tf.gather(audio_value, int(audio_value.get_shape()[0]) - 1)
self.final_audio_encoder = tf.concat([self.audio_encoder, self.encoder_prosody], axis=1)
self.audio_batch_pred = tf.layers.dense(self.final_audio_encoder,
(self.config.hidden_dim + self.config.AUDIO_PROSODY) // 2,
name='final_audio_encoder')
with tf.variable_scope("text_rnn"):
cells2 = [dropout() for _ in range(self.config.num_layers)]
rnn_cell2 = tf.contrib.rnn.MultiRNNCell(cells2, state_is_tuple=True)
data = tf.nn.embedding_lookup(self.wordVectors, self.text_encoder_inputs)
data = tf.cast(data, tf.float32)
text_value, _ = tf.nn.dynamic_rnn(cell=rnn_cell2, inputs=data, dtype=tf.float32)
text_value = tf.transpose(text_value, [1, 0, 2])