tesnorflow回归

最新推荐文章于 2022-10-25 17:41:48 发布

Lzj000lzj

最新推荐文章于 2022-10-25 17:41:48 发布

阅读量218

点赞数

分类专栏： tensorflow 文章标签：回归 tf

本文链接：https://blog.csdn.net/Lzj000lzj/article/details/94325137

版权

tensorflow 专栏收录该内容

8 篇文章 0 订阅

订阅专栏

数据加载

dataset_path = keras.utils.get_file('auto-mpg.data',
                                   'https://archive.ics.uci.edu/ml/machine-learning-databases/auto-mpg/auto-mpg.data')
                                   column_names = ['MPG','Cylinders','Displacement','Horsepower','Weight',
                'Acceleration', 'Model Year', 'Origin'] 
raw_dataset = pd.read_csv(dataset_path, names=column_names,
                         na_values='?', comment='\t',
                         sep=' ', skipinitialspace=True)
dataset = raw_dataset.copy()
dataset.tail()#查看数据最后几行
输出：

MPG	Cylinders	Displacement	Horsepower	Weight	Acceleration	Model Year	Origin
393	27.0		4				140.0		86.0		2790.0		15.6	82		1
394	44.0		4				97.0		52.0		2130.0		24.6	82		2
395	32.0		4				135.0		84.0		2295.0		11.6	82		1
396	28.0		4				120.0		79.0		2625.0		18.6	82		1
397	31.0		4				119.0		82.0		2720.0		19.4	82		1

数据预处理

print(dataset.isna().sum())#查看空值情况
dataset = dataset.dropna()
##一般性df的onehot方法
# from sklearn.preprocessing import OneHotEncoder
# onehot_origin=pd.DataFrame(OneHotEncoder().fit_transform(dataset[['Origin']]).toarray())
# print(onehot_origin.tail())
##当需要被onehot的特征值比较少时
origin = dataset.pop('Origin')
dataset['USA'] = (origin == 1)*1.0
dataset['Europe'] = (origin == 2)*1.0
dataset['Japan'] = (origin == 3)*1.0
dataset.tail()

划分训练集测试集

#dataframe划分训练集的方式，而不是用sklearn的train_test_split
train_dataset = dataset.sample(frac=0.8,random_state=0)
test_dataset = dataset.drop(train_dataset.index)

宏观观察数据分布

sns.pairplot(train_dataset[["MPG", "Cylinders", "Displacement", "Weight"]], diag_kind="kde")#观察训练集中几对列的联合分布。
#查看一些统计数据
train_stats = train_dataset.describe()
train_stats.pop("MPG")
train_stats = train_stats.transpose()
train_stats
输出：
count	mean	std	min	25%	50%	75%	max等统计数据

数据准备

#取出标签
train_labels = train_dataset.pop('MPG')
test_labels = test_dataset.pop('MPG')
#标准化数据
def norm(x):
    return (x - train_stats['mean']) / train_stats['std']
normed_train_data = norm(train_dataset)
normed_test_data = norm(test_dataset)

构建训练模型

def build_model():
    model = keras.Sequential([
        layers.Dense(64, activation='relu', input_shape=[len(train_dataset.keys())]),
        layers.Dense(64, activation='relu'),
        layers.Dense(1)
    ])
    
    optimizer = tf.keras.optimizers.RMSprop(0.001)
    model.compile(loss='mse',
                 optimizer=optimizer,
                 metrics=['mae', 'mse'])#
    return model

model = build_model()
model.summary()
class PrintDot(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if epoch % 100 == 0: print('')
        print('.', end='')
EPOCHS = 1000
history = model.fit(
  normed_train_data, train_labels,
  epochs=EPOCHS, validation_split = 0.2, verbose=1)
  #,callbacks=[PrintDot()]

#查看训练记录
hist = pd.DataFrame(history.history)
hist['epoch'] = history.epoch
hist.tail()
输出形式：
loss	mae	mse	val_loss	val_mae	val_mse	epoch

#绘制训练的结果图
def plot_history(history):
    hist = pd.DataFrame(history.history)
    hist['epoch'] = history.epoch

    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Abs Error [MPG]')
    plt.plot(hist['epoch'], hist['mae'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_mae'],
           label = 'Val Error')
    plt.ylim([0,5])
    plt.legend()

    plt.figure()
    plt.xlabel('Epoch')
    plt.ylabel('Mean Square Error [$MPG^2$]')
    plt.plot(hist['epoch'], hist['mse'],
           label='Train Error')
    plt.plot(hist['epoch'], hist['val_mse'],
           label = 'Val Error')
    plt.ylim([0,20])
    plt.legend()
    plt.show()


plot_history(history)

#预测回归结果并绘制散点图
loss, mae, mse = model.evaluate(normed_test_data, test_labels, verbose=0)

print("Testing set Mean Abs Error: {:5.2f} MPG".format(mae))
test_predictions = model.predict(normed_test_data).flatten()
plt.scatter(test_labels, test_predictions)
plt.xlabel('True Values [MPG]')
plt.ylabel('Predictions [MPG]')
plt.axis('equal')
plt.axis('square')
plt.xlim([0,plt.xlim()[1]])
plt.ylim([0,plt.ylim()[1]])
_ = plt.plot([-100, 100], [-100, 100])
error = test_predictions - test_labels
plt.hist(error, bins = 25)
plt.xlabel("Prediction Error [MPG]")
_ = plt.ylabel("Count")

早停法

model = build_model()

early_stop = keras.callbacks.EarlyStopping(monitor='val_loss', patience=10)#当val_loss迭代10次不再减小时early_stop

history = model.fit(normed_train_data, train_labels, epochs=EPOCHS,
                    validation_split = 0.2, verbose=0, callbacks=[early_stop, PrintDot()])

plot_history(history)