with tf.GradientTape() as tape:
q_eval_parameters = self.q_parameter(states, training=True)
# q_values = tf.squeeze(self.q_eval(np.concatenate([states, q_eval_parameters], 2)))
q_values = tf.squeeze(self.q_eval(tf.keras.layers.concatenate([states, q_eval_parameters], 2)))
q_values = tf.gather_nd(params=q_values, indices=enum_actions)
loss_parameter = -sum(q_values)
grads_parameter = tape.gradient(loss_parameter, self.q_parameter.trainable_variables)
self.optimizer.apply_gradients(zip(grads_parameter, self.q_parameter.trainable_variables))
错误原因就是np.concat()对向量进行的拼接,而不是tf.keras.layers.concatenate()
修改后错误解决
# q_values = tf.squeeze(self.q_eval(np.concatenate([states, q_eval_parameters], 2))) q_values = tf.squeeze(self.q_eval(tf.keras.layers.concatenate([states, q_eval_parameters], 2)))