tf.keras.losses.MeanSquaredError()

import numpy as np
import tensorflow as tf
y_true = [[0., 1.], [2., 3.],[4., 5.]]
y_pred = [[6., 7.], [8., 9.],[10., 11.]]

mse = tf.keras.losses.MeanSquaredError()
print(mse(y_true, y_pred,sample_weight=[0.1,0.2,3]).numpy())

用上例简述tf.kears.losses.MeanSquaredError()中的sample_weight的作用:

解释:

0-11:按照位置对应即可。

0.1,0.2,0.3:权重参数。 

3:sample_weight的个数

6:len(tf.reshape(y_pred,(-1,1))),也就是y_pred里面数字的个数。

如果sample_weight换成[0.2,0.1,0.3]、[0.3,0.2,0.1]结果和[0.1,0.2,0.3]结果相同。看下面的例子。

import numpy as np
import tensorflow as tf


y_true = [[1., 2.], [3., 4.],[5., 6.]]
y_pred = [[7., 8.], [9., 10.],[11., 12.]]
mse = tf.keras.losses.MeanSquaredError()

print(mse(y_true, y_pred,sample_weight=[2,4,5]).numpy())
print(mse(y_true, y_pred,sample_weight=[2,5,4]).numpy())
print(mse(y_true, y_pred,sample_weight=[4,2,5]).numpy())
print(mse(y_true, y_pred,sample_weight=[4,5,2]).numpy())
print(mse(y_true, y_pred,sample_weight=[5,2,4]).numpy())
print(mse(y_true, y_pred,sample_weight=[5,4,2]).numpy())

结果

 可见参数的顺序并不能改变最后的结果。

import tensorflow as tf import numpy as np import gym # 创建 CartPole 游戏环境 env = gym.make('CartPole-v1') # 定义神经网络模型 model = tf.keras.models.Sequential([ tf.keras.layers.Dense(24, activation='relu', input_shape=(4,)), tf.keras.layers.Dense(24, activation='relu'), tf.keras.layers.Dense(2, activation='linear') ]) # 定义优化器和损失函数 optimizer = tf.keras.optimizers.Adam() loss_fn = tf.keras.losses.MeanSquaredError() # 定义超参数 gamma = 0.99 # 折扣因子 epsilon = 1.0 # &epsilon;-贪心策略中的初始 &epsilon; 值 epsilon_min = 0.01 # &epsilon;-贪心策略中的最小 &epsilon; 值 epsilon_decay = 0.995 # &epsilon;-贪心策略中的衰减值 batch_size = 32 # 每个批次的样本数量 memory = [] # 记忆池 # 定义动作选择函数 def choose_action(state): if np.random.rand() < epsilon: return env.action_space.sample() else: Q_values = model.predict(state[np.newaxis]) return np.argmax(Q_values[0]) # 定义经验回放函数 def replay(batch_size): batch = np.random.choice(len(memory), batch_size, replace=False) for index in batch: state, action, reward, next_state, done = memory[index] target = model.predict(state[np.newaxis]) if done: target[0][action] = reward else: Q_future = np.max(model.predict(next_state[np.newaxis])[0]) target[0][action] = reward + Q_future * gamma model.fit(state[np.newaxis], target, epochs=1, verbose=0) # 训练模型 for episode in range(1000): state = env.reset() done = False total_reward = 0 while not done: action = choose_action(state) next_state, reward, done, _ = env.step(action) memory.append((state, action, reward, next_state, done)) state = next_state total_reward += reward if len(memory) > batch_size: replay(batch_size) epsilon = max(epsilon_min, epsilon * epsilon_decay) print("Episode {}: Score = {}, &epsilon; = {:.2f}".format(episode, total_reward, epsilon))next_state, reward, done, _ = env.step(action) ValueError: too many values to unpack (expected 4)优化代码
05-24
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值