问题如下
from keras.datasets import boston_housing
import numpy as np
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
x_train = train_data.astype(np.float32)
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(13,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse', metrics=['mae'])
y_train = train_targets.astype(np.float32)
# y_test = test_targets.astype(np.float32)
model.fit(x_train,y_train,epochs=1,batch_size=404)
print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))
sub_sqr=0
for a,b in zip(y_train,model.predict(x_train)):
sub_sqr+= (a-b)**2
print(f"rme : {sub_sqr/len(y_train)}")
结果如下
可以看到,keras输出的MSE loss和自己计算输出的loss是不一样的,比较费解
解决方法
epoch=1
from keras.datasets import boston_housing
import numpy as np
##################### set random seed. ########################
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)
# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions:
# tf.compat.v1.set_random_seed(seed_value)
##################### set random seed. ########################
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
x_train = train_data.astype(np.float32)
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(13,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse', metrics=['mae'])
y_train = train_targets.astype(np.float32)
#get values before training
y_pred1=model.predict(x_train)
model.fit(x_train,y_train,epochs=1,batch_size=404)
print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))
#model.trainable=True
#get values after training
y_pred2= model.predict(x_train)#batch_size default setting be 32
#get values after training
y_pred3= model.predict(x_train,batch_size=404)
#get values after training
y_pred4= model.predict(x_train,batch_size=100)
print(np.mean((y_train -y_pred1.ravel()) ** 2))## 未训练前的预测,和keras一样
print(np.mean((y_train -y_pred2.ravel()) ** 2))## 经过1个epoch训练的预测,结合下面epoch=2看
print(np.mean((y_train -y_pred3.ravel()) ** 2))## 经过1个epoch的预测,结合下面epoch=2看
print(np.mean((y_train -y_pred4.ravel()) ** 2))## 经过1个epoch的预测,结合下面epoch-2看
epoch=2
from keras.datasets import boston_housing
import numpy as np
##################### set random seed. ########################
# Seed value
# Apparently you may use different seed values at each stage
seed_value= 0
# 1. Set the `PYTHONHASHSEED` environment variable at a fixed value
import os
os.environ['PYTHONHASHSEED']=str(seed_value)
# 2. Set the `python` built-in pseudo-random generator at a fixed value
import random
random.seed(seed_value)
# 3. Set the `numpy` pseudo-random generator at a fixed value
import numpy as np
np.random.seed(seed_value)
# 4. Set the `tensorflow` pseudo-random generator at a fixed value
import tensorflow as tf
tf.random.set_seed(seed_value)
# for later versions:
# tf.compat.v1.set_random_seed(seed_value)
##################### set random seed. ########################
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
x_train = train_data.astype(np.float32)
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(13,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse', metrics=['mae'])
y_train = train_targets.astype(np.float32)
#get values before training
y_pred1=model.predict(x_train)
model.fit(x_train,y_train,epochs=2,batch_size=404)
print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))
#model.trainable=True
#get values after training
y_pred2= model.predict(x_train)#batch_size default setting be 32
#get values after training
y_pred3= model.predict(x_train,batch_size=404)
#get values after training
y_pred4= model.predict(x_train,batch_size=100)
print(np.mean((y_train -y_pred1.ravel()) ** 2))
print(np.mean((y_train -y_pred2.ravel()) ** 2))
print(np.mean((y_train -y_pred3.ravel()) ** 2))
print(np.mean((y_train -y_pred4.ravel()) ** 2))
输出结果如下
epoch=1000
from keras.datasets import boston_housing
import numpy as np
(train_data, train_targets), (test_data, test_targets) = boston_housing.load_data()
x_train = train_data.astype(np.float32)
from keras import models
from keras import layers
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(13,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(1))
model.compile(optimizer='rmsprop',loss='mse')
y_train = train_targets.astype(np.float32)
print(y_train.shape)
# y_test = test_targets.astype(np.float32)
model.fit(x_train,y_train,epochs=1000,batch_size=404)
print(np.mean((y_train - model.predict(x_train).ravel()) ** 2))
sub_sqr=0
for a,b in zip(y_train,model.predict(x_train)):
sub_sqr+= (a-b)**2
print(f"rme : {sub_sqr/len(y_train)}")
结果如下
可以看到,最终的结果显示keras的mse的计算结果和自己手动计算的方式又一致了
总结如下
keras的输出实际上是比我们预想到的要晚一个epoch的,因为第一个epoch输出的loss实际上是未训练前的loss, 这个是值得注意的