在使用tensorflow lstm的时候出现报错:ValueError: Trying to share variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (512, 1024) and found shape (556, 1024).
完整报错如下:
Traceback (most recent call last):
File "D:/python workspace/Chinese-novel-generation-master/beifen.py", line 288, in <module>
logits, final_state = build_nn(cell, rnn_size, input_text, vocab_size, embed_dim)
File "D:/python workspace/Chinese-novel-generation-master/beifen.py", line 222, in build_nn
outputs, final_state = build_rnn(cell, embed)
File "D:/python workspace/Chinese-novel-generation-master/beifen.py", line 209, in build_rnn
outputs, final_state = tf.nn.dynamic_rnn(cell, inputs=inputs, dtype=tf.float32)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn.py", line 598, in dynamic_rnn
dtype=dtype)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn.py", line 761, in _dynamic_rnn_loop
swap_memory=swap_memory)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2775, in while_loop
result = context.BuildLoop(cond, body, loop_vars, shape_invariants)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2604, in BuildLoop
pred, body, original_loop_vars, loop_vars, shape_invariants)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\control_flow_ops.py", line 2554, in _BuildLoop
body_result = body(*packed_vars_for_body)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn.py", line 746, in _time_step
(output, new_state) = call_cell()
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn.py", line 732, in <lambda>
call_cell = lambda: cell(input_t, state)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\layers\base.py", line 450, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 938, in call
cur_inp, new_state = cell(cur_inp, cur_state)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 774, in __call__
output, new_state = self._cell(inputs, state, scope)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 180, in __call__
return super(RNNCell, self).__call__(inputs, state)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\layers\base.py", line 450, in __call__
outputs = self.call(inputs, *args, **kwargs)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 401, in call
concat = _linear([inputs, h], 4 * self._num_units, True)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 1039, in _linear
initializer=kernel_initializer)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1065, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 962, in get_variable
use_resource=use_resource, custom_getter=custom_getter)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 360, in get_variable
validate_shape=validate_shape, use_resource=use_resource)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 1405, in wrapped_custom_getter
*args, **kwargs)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\rnn_cell_impl.py", line 183, in _rnn_get_variable
variable = getter(*args, **kwargs)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 352, in _true_getter
use_resource=use_resource)
File "C:\Users\wenqi\AppData\Local\Programs\Python\Python35\lib\site-packages\tensorflow\python\ops\variable_scope.py", line 669, in _get_single_variable
found_var.get_shape()))
ValueError: Trying to share variable rnn/multi_rnn_cell/cell_0/basic_lstm_cell/kernel, but specified shape (512, 1024) and found shape (556, 1024).
找到问题原因:大概跟变量的variable_scope有关,我看了一下我定义cell时的语法:
def get_init_cell(batch_size, rnn_size):
# lstm层数
num_layers = 3
# dropout时的保留概率
keep_prob = 0.8
# 创建包含rnn_size个神经元的lstm cell
cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
# 使用dropout机制防止overfitting等
drop = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
# 创建2层lstm层
cell = tf.contrib.rnn.MultiRNNCell([drop for _ in range(num_layers)])
# 初始化状态为0.0
init_state = cell.zero_state(batch_size, tf.float32)
# 使用tf.identify给init_state取个名字,后面生成文字的时候,要使用这个名字来找到缓存的state
init_state = tf.identity(init_state, name='init_state')
return cell, init_state
由于有三层或者一些其他原因导致了报错,于是我把它改成:
from tensorflow.contrib import rnn
def lstm_cell(hidden_size,keep_prob):
cell = rnn.LSTMCell(hidden_size, reuse=tf.get_variable_scope().reuse)
return rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
# 创建rnn cell,使用lstm cell,并创建相应层数的lstm层,应用dropout,以及初始化lstm层状态。
def get_init_cell(batch_size, rnn_size):
# lstm层数
num_layers = 3
# dropout时的保留概率
keep_prob = 0.8
# # 创建包含rnn_size个神经元的lstm cell
# cell = tf.contrib.rnn.BasicLSTMCell(rnn_size)
#
# # 使用dropout机制防止overfitting等
# drop = tf.contrib.rnn.DropoutWrapper(cell, output_keep_prob=keep_prob)
# 创建2层lstm层
# cell = tf.contrib.rnn.MultiRNNCell([drop for _ in range(num_layers)])
cell = tf.contrib.rnn.MultiRNNCell([lstm_cell(rnn_size,keep_prob) for _ in range(num_layers)],
state_is_tuple=True)
# 初始化状态为0.0
init_state = cell.zero_state(batch_size, tf.float32)
# 使用tf.identify给init_state取个名字,后面生成文字的时候,要使用这个名字来找到缓存的state
init_state = tf.identity(init_state, name='init_state')
return cell, init_state
成功解决。