keras 的MSE函数和自己实现的MSE函数不一致问题

该博客探讨了在 Keras 中训练模型时,自定义计算损失函数与内置损失函数之间的差异。作者通过设置随机种子确保可重复性,并在不同训练阶段对比了预测输出,揭示了 Keras 的损失输出实际上是在当前 epoch 结束后的状态。文章强调了理解模型训练过程中的损失计算时机的重要性。
摘要由CSDN通过智能技术生成

问题如下

from keras.datasets import boston_housing
import numpy as np
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()


x_train = train_data.astype(np.float32)

from keras import models 
from keras import layers

model = models.Sequential() 
model.add(layers.Dense(64, activation='relu', input_shape=(13,))) 
model.add(layers.Dense(64, activation='relu')) 
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse', metrics=['mae'])

y_train = train_targets.astype(np.float32)
# y_test = test_targets.astype(np.float32)

model.fit(x_train,y_train,epochs=1,batch_size=404)

print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))

sub_sqr=0
for a,b in zip(y_train,model.predict(x_train)):
    sub_sqr+= (a-b)**2

print(f"rme : {sub_sqr/len(y_train)}")

结果如下
在这里插入图片描述可以看到,keras输出的MSE loss和自己计算输出的loss是不一样的,比较费解

解决方法

epoch=1

from keras.datasets import boston_housing
import numpy as np


##################### set random seed. ########################
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)
# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions: 
# tf.compat.v1.set_random_seed(seed_value)
##################### set random seed. ########################

(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()


x_train = train_data.astype(np.float32)

from keras import models 
from keras import layers

model = models.Sequential() 
model.add(layers.Dense(64, activation='relu', input_shape=(13,))) 
model.add(layers.Dense(64, activation='relu')) 
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse', metrics=['mae'])
y_train = train_targets.astype(np.float32)
#get values before training
y_pred1=model.predict(x_train)
model.fit(x_train,y_train,epochs=1,batch_size=404)
print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))

#model.trainable=True
#get values after training 
y_pred2= model.predict(x_train)#batch_size default setting be 32 
#get values after training 
y_pred3= model.predict(x_train,batch_size=404)
#get values after training 
y_pred4= model.predict(x_train,batch_size=100)

print(np.mean((y_train -y_pred1.ravel()) ** 2))## 未训练前的预测,和keras一样
print(np.mean((y_train -y_pred2.ravel()) ** 2))## 经过1个epoch训练的预测,结合下面epoch=2看
print(np.mean((y_train -y_pred3.ravel()) ** 2))## 经过1个epoch的预测,结合下面epoch=2看
print(np.mean((y_train -y_pred4.ravel()) ** 2))## 经过1个epoch的预测,结合下面epoch-2看

在这里插入图片描述

epoch=2

from keras.datasets import boston_housing
import numpy as np

##################### set random seed. ########################
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)
# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions: 
# tf.compat.v1.set_random_seed(seed_value)
##################### set random seed. ########################


(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()


x_train = train_data.astype(np.float32)

from keras import models 
from keras import layers

model = models.Sequential() 
model.add(layers.Dense(64, activation='relu', input_shape=(13,))) 
model.add(layers.Dense(64, activation='relu')) 
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse', metrics=['mae'])
y_train = train_targets.astype(np.float32)
#get values before training
y_pred1=model.predict(x_train)
model.fit(x_train,y_train,epochs=2,batch_size=404)
print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))

#model.trainable=True
#get values after training 
y_pred2= model.predict(x_train)#batch_size default setting be 32 
#get values after training 
y_pred3= model.predict(x_train,batch_size=404)
#get values after training 
y_pred4= model.predict(x_train,batch_size=100)

print(np.mean((y_train -y_pred1.ravel()) ** 2))
print(np.mean((y_train -y_pred2.ravel()) ** 2))
print(np.mean((y_train -y_pred3.ravel()) ** 2))
print(np.mean((y_train -y_pred4.ravel()) ** 2))

输出结果如下
在这里插入图片描述

epoch=1000

from keras.datasets import boston_housing
import numpy as np
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()


x_train = train_data.astype(np.float32)

from keras import models 
from keras import layers

model = models.Sequential() 
model.add(layers.Dense(64, activation='relu', input_shape=(13,))) 
model.add(layers.Dense(64, activation='relu')) 
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse')

y_train = train_targets.astype(np.float32)
print(y_train.shape)
# y_test = test_targets.astype(np.float32)

model.fit(x_train,y_train,epochs=1000,batch_size=404)

print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))

sub_sqr=0
for a,b in zip(y_train,model.predict(x_train)):
    sub_sqr+= (a-b)**2

print(f"rme : {sub_sqr/len(y_train)}")

结果如下
在这里插入图片描述可以看到,最终的结果显示keras的mse的计算结果和自己手动计算的方式又一致了

总结如下

keras的输出实际上是比我们预想到的要晚一个epoch的,因为第一个epoch输出的loss实际上是未训练前的loss, 这个是值得注意的

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值