python自定义结束符号,python 自定义损失函数:NotImplementedError:无法将符号张量(truediv_2:0)转换为小块 - 糯米PHP...

我正在尝试编写一个带有内部额外参数的自定义损失函数,以实现演员评论家算法:

def loss(y_true, y_pred):

y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)

log_likelihood = y_true * K.log(y_pred_clipped)

return K.sum(-log_likelihood * delta)

return loss

但我得到了错误:

NotImplementedError: Cannot convert a symbolic Tensor (truediv_2:0) to a numpy array.

完整代码:

from tensorflow.keras.layers import Dense

from tensorflow.keras import Input, Model, callbacks, models

from tensorflow.keras import backend as K

from tensorflow.keras.optimizers import Adam

import tensorflow as tf

tf.config.experimental_run_functions_eagerly(True)

import numpy as np

import os

class Agent(object):

def __init__(self, alpha, beta, gamma=0.99, n_action=2, load=False,

input_dims=4, layer_shared=1024, layer_actor=128, layer_critic=128):

self.alpha = alpha

self.beta = beta

self.gamma = gamma

self.load = load

self.input_dims = input_dims

self.n_action = n_action

self.layer_shared = layer_shared

self.layer_actor = layer_actor

self.layer_critic = layer_critic

self.action_space = [i for i in range(n_action)]

self.actor, self.critic, self.policy = self.build_actor_critic_network()

def build_actor_critic_network(self, load=False):

main_input = Input(shape=(self.input_dims,), name='main_input')

delta = Input(shape=(1), name='delta')

dense_shared = Dense(self.layer_shared, activation='relu', name='dense_shared')(main_input)

dense_actor = Dense(self.layer_actor, activation='relu', name='dense_actor')(dense_shared)

output_actor = Dense(self.n_action, activation='softmax', name='output_actor')(dense_actor)

dense_critic = Dense(self.layer_critic, activation='relu', name='dense_critic')(dense_shared)

output_critic = Dense(1, activation='linear', name='output_critic')(dense_critic)

def custom_loss(delta):

def loss(y_true, y_pred):

y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)

log_likelihood = y_true * K.log(y_pred_clipped)

return K.sum(-log_likelihood * delta)

return loss

model_actor = Model(inputs=[main_input, delta], outputs=output_actor, name='model_actor')

model_actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss(delta))

model_critic = Model(inputs=[main_input], outputs=output_critic, name='model_critic')

model_critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')

model_policy = Model(inputs=[main_input], outputs=output_actor)

model_critic.layers[1].trainable = False

print(f'layer "{model_critic.layers[1].name}" of the "model_critic" frozen')

return model_actor, model_critic, model_policy

def choose_action(self, state):

state = state[np.newaxis,:]

probabilities = self.policy.predict(state)[0]

action = np.random.choice(self.action_space, p=probabilities)

return action

def learn(self, state, action, reward, state_, done):

state = state[np.newaxis,:]

state_ = state_[np.newaxis,:]

critic_value_ = self.critic.predict(state_)

critic_value = self.critic.predict(state)

target = reward + self.gamma * critic_value_ * (1 - int(done))

delta = target - critic_value

actions = np.zeros([1, self.n_action])

actions[np.arange(1), action] = 1.0

self.actor.fit([state, delta], actions, verbose=1)

self.critic.fit([state], target, verbose=1)

跑步者代码:

import gym

env = gym.make('LunarLander-v2')

agent = Agent(alpha=0.00002, beta=0.0001, input_dims=8, n_action=4, load=False)

num_episodes = 2000

length_episode = 100

score_history = []

log = 1

for i in range(num_episodes):

done = False

score = 0

observation = env.reset()

for t in range(length_episode):

action = agent.choose_action(observation)

observation_, reward, done, info = env.step(action)

agent.learn(observation, action, reward, observation_, done)

observation = observation_

score += reward

score_history.append(score)

avg_score = np.mean(score_history[-log:])

if i % log ==0:

print(f'episode n°{i}, score {avg_score}')

错误:

layer "dense_shared" of the "model_critic" frozen

Train on 1 samples

1/1 [==============================]

---------------------------------------------------------------------------

NotImplementedError Traceback (most recent call last)

in

16 action = agent.choose_action(observation)

17 observation_, reward, done, info = env.step(action)

---> 18 agent.learn(observation, action, reward, observation_, done)

19 observation = observation_

20 if done:

in learn(self, state, action, reward, state_, done)

82 actions = np.zeros([1, self.n_action])

83 actions[np.arange(1), action] = 1.0

---> 84 self.actor.fit([state, delta_tensor], actions, verbose=1)

85 self.critic.fit([state], target, verbose=1)

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training.py in fit(self, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)

817 max_queue_size=max_queue_size,

818 workers=workers,

--> 819 use_multiprocessing=use_multiprocessing)

820

821 def evaluate(self,

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in fit(self, model, x, y, batch_size, epochs, verbose, callbacks, validation_split, validation_data, shuffle, class_weight, sample_weight, initial_epoch, steps_per_epoch, validation_steps, validation_freq, max_queue_size, workers, use_multiprocessing, **kwargs)

395 total_epochs=1)

396 cbks.make_logs(model, epoch_logs, eval_result, ModeKeys.TEST,

--> 397 prefix='val_')

398

399 return model.history

C:\ProgramData\Anaconda3\lib\contextlib.py in __exit__(self, type, value, traceback)

117 if type is None:

118 try:

--> 119 next(self.gen)

120 except StopIteration:

121 return False

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\engine\training_v2.py in on_epoch(self, epoch, mode)

770 # Epochs only apply to `fit`.

771 self.callbacks.on_epoch_end(epoch, epoch_logs)

--> 772 self.progbar.on_epoch_end(epoch, epoch_logs)

773

774 @tf_contextlib.contextmanager

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\callbacks.py in on_epoch_end(self, epoch, logs)

787 self.log_values.append((k, logs[k]))

788 if self.verbose:

--> 789 self.progbar.update(self.seen, self.log_values)

790

791

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\keras\utils\generic_utils.py in update(self, current, values)

557 info += ' - %s:' % k

558 if isinstance(self._values[k], list):

--> 559 avg = np.mean(self._values[k][0] / max(1, self._values[k][1]))

560 if abs(avg) > 1e-3:

561 info += ' %.4f' % avg

<__array_function__ internals> in mean(*args, **kwargs)

C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\fromnumeric.py in mean(a, axis, dtype, out, keepdims)

3333

3334 return _methods._mean(a, axis=axis, dtype=dtype,

-> 3335 out=out, **kwargs)

3336

3337

C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_methods.py in _mean(a, axis, dtype, out, keepdims)

133

134 def _mean(a, axis=None, dtype=None, out=None, keepdims=False):

--> 135 arr = asanyarray(a)

136

137 is_float16_result = False

C:\ProgramData\Anaconda3\lib\site-packages\numpy\core\_asarray.py in asanyarray(a, dtype, order)

136

137 """

--> 138 return array(a, dtype, copy=False, order=order, subok=True)

139

140

C:\ProgramData\Anaconda3\lib\site-packages\tensorflow_core\python\framework\ops.py in __array__(self)

726 def __array__(self):

727 raise NotImplementedError("Cannot convert a symbolic Tensor ({}) to a numpy"

--> 728 " array.".format(self.name))

729

730 def __len__(self):

NotImplementedError: Cannot convert a symbolic Tensor (truediv_2:0) to a numpy array.

我对此错误感到有些困惑,我尝试搜索一些不同的帖子,但没有一个解决了我的问题。我知道这与delta应该是张量的事实有关,但是我觉得由于将其初始化为“ Input”,所以应该没问题。我也尝试在不同的地方进行转换,但并没有解决问题。如果您知道如何解决此问题,将不胜感激:)

谢谢!

尝试输入((1))的虚拟模型:

from tensorflow.keras.layers import Input,Dense

from tensorflow.keras.models import Model

import numpy as np

x_train = np.random.random((1000, 1))

y_train = np.random.randint(2, size=(1000, 1))

inp = Input((1))

dense = Dense(10)(inp)

out = Dense(1, activation='sigmoid')(dense)

model = Model(inp,out)

model.compile(loss='binary_crossentropy',optimizer='rmsprop',metrics=['accuracy'])

model.fit(x_train, y_train,epochs=1,batch_size=100)

版本:

import tensorflow as tf

print('tf:', tf.__version__)

print('keras:', tf.keras.__version__)

输出:tf:2.1.0 keras:2.2.4-tf

编辑:谢谢mdaoust!有用 :)

但是,也许我初始化delta变量的方式不太好,我不知道您如何将变量添加到模型“ actor”中。self.actor.delta.assign(delta)请告诉我是否有更漂亮的方法!

有相同问题的人的工作代码:

from tensorflow.keras.layers import Dense

from tensorflow.keras import Input, Model, callbacks, models

from tensorflow.keras import backend as K

from tensorflow.keras.optimizers import Adam

import tensorflow as tf

# tf.config.experimental_run_functions_eagerly(True)

import numpy as np

import os

class Agent(object):

def __init__(self, alpha, beta, gamma=0.99, n_action=2, load=False,

input_dims=4, layer_shared=1024, layer_actor=128, layer_critic=128):

self.alpha = alpha

self.beta = beta

self.gamma = gamma

self.load = load

self.input_dims = input_dims

self.n_action = n_action

self.layer_shared = layer_shared

self.layer_actor = layer_actor

self.layer_critic = layer_critic

self.action_space = [i for i in range(n_action)]

self.actor, self.critic, self.policy, self.delta = self.build_actor_critic_network()

def build_actor_critic_network(self, load=False):

main_input = Input(shape=(self.input_dims,), name='main_input')

delta = tf.Variable([[0.]], trainable=False)

dense_shared = Dense(self.layer_shared, activation='relu', name='dense_shared')(main_input)

dense_actor = Dense(self.layer_actor, activation='relu', name='dense_actor')(dense_shared)

output_actor = Dense(self.n_action, activation='softmax', name='output_actor')(dense_actor)

dense_critic = Dense(self.layer_critic, activation='relu', name='dense_critic')(dense_shared)

output_critic = Dense(1, activation='linear', name='output_critic')(dense_critic)

def custom_loss(delta):

def loss(y_true, y_pred):

y_pred_clipped = K.clip(y_pred, 1e-8, 1 - 1e-8)

log_likelihood = y_true * K.log(y_pred_clipped)

return K.sum(-log_likelihood * delta)

return loss

model_actor = Model(inputs=[main_input], outputs=output_actor, name='model_actor')

model_actor.compile(optimizer=Adam(lr=self.alpha), loss=custom_loss(delta))

model_critic = Model(inputs=[main_input], outputs=output_critic, name='model_critic')

model_critic.compile(optimizer=Adam(lr=self.beta), loss='mean_squared_error')

model_policy = Model(inputs=[main_input], outputs=output_actor)

model_critic.layers[1].trainable = False

print(f'layer "{model_critic.layers[1].name}" of the "model_critic" frozen')

return model_actor, model_critic, model_policy, delta

def choose_action(self, state):

state = state[np.newaxis,:]

probabilities = self.policy.predict(state)[0]

action = np.random.choice(self.action_space, p=probabilities)

return action

def learn(self, state, action, reward, state_, done):

state = state[np.newaxis,:]

state_ = state_[np.newaxis,:]

critic_value_ = self.critic.predict(state_)

critic_value = self.critic.predict(state)

target = reward + self.gamma * critic_value_ * (1 - int(done))

delta_numpy = target - critic_value

actions = np.zeros([1, self.n_action])

actions[np.arange(1), action] = 1.0

self.delta.assign(delta_numpy)

self.actor.fit(state, actions, verbose=1)

self.critic.fit(state, target, verbose=1)

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值