california_housing:数据集,实现简单的回归模型。
数据集:from sklearn.datasets import fetch_california_housing
import matplotlib as mpl
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import sklearn
import pandas as pd
import tensorflow as tf
from tensorflow import keras
数据集载入
from sklearn.datasets import fetch_california_housing
housing = fetch_california_housing()
# print(housing.DESCR)
# print(housing.data.shape)
# print(housing.target.shape)
查看数据
import pprint
# pprint 打印,对format有优化
pprint.pprint(housing.data[0:5])
pprint.pprint(housing.target[0:5])
array([[ 8.32520000e+00, 4.10000000e+01, 6.98412698e+00,
1.02380952e+00, 3.22000000e+02, 2.55555556e+00,
3.78800000e+01, -1.22230000e+02],
[ 8.30140000e+00, 2.10000000e+01, 6.23813708e+00,
9.71880492e-01, 2.40100000e+03, 2.10984183e+00,
3.78600000e+01, -1.22220000e+02],
[ 7.25740000e+00, 5.20000000e+01, 8.28813559e+00,
1.07344633e+00, 4.96000000e+02, 2.80225989e+00,
3.78500000e+01, -1.22240000e+02],
[ 5.64310000e+00, 5.20000000e+01, 5.81735160e+00,
1.07305936e+00, 5.58000000e+02, 2.54794521e+00,
3.78500000e+01, -1.22250000e+02],
[ 3.84620000e+00, 5.20000000e+01, 6.28185328e+00,
1.08108108e+00, 5.65000000e+02, 2.18146718e+00,
3.78500000e+01, -1.22250000e+02]])
array([4.526, 3.585, 3.521, 3.413, 3.422])
数据处理:训练集,测试集
from sklearn.model_selection import train_test_split
# 默认3:1,test_size=0.25
x_train_all, x_test, y_train_all, y_test = train_test_split(
housing.data, housing.target, random_state = 7)
x_train, x_valid, y_train, y_valid, = train_test_split(
x_train_all, y_train_all, random_state = 11)
print(x_train.shape, y_train.shape)
print(x_valid.shape, y_valid.shape)
print(x_test.shape, y_test.shape)
(11610, 8) (11610,)
(3870, 8) (3870,)
(5160, 8) (5160,)
归一化(标准化)处理
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_valid_scaled = scaler.transform(x_valid)
x_test_scaled = scaler.transform(x_test)
建立模型
model = keras.models.Sequential([
keras.layers.Dense(30, activation='relu',
input_shape=x_train.shape[1: ]),
keras.layers.Dense(1),
])
model.summary()
model.compile(loss="mean_squared_error", optimizer="sgd")
callbacks = [keras.callbacks.EarlyStopping(
patience=5, min_delta=1e-3)]
Model: "sequential_1"
_________________________________________________________________
Layer (type) Output Shape Param #
=================================================================
dense_2 (Dense) (None, 30) 270
_________________________________________________________________
dense_3 (Dense) (None, 1) 31
=================================================================
Total params: 301
Trainable params: 301
Non-trainable params: 0
_________________________________________________________________
模型训练
history = model.fit(x_train_scaled, y_train,
validation_data = (x_valid_scaled, y_valid),
epochs = 100,
callbacks = callbacks)
def plot_learning_curves(history):
pd.DataFrame(history.history).plot(figsize=(8, 5))
plt.grid(True)
plt.gca().set_ylim(0, 1)
plt.show()
plot_learning_curves(history)
验证
model.evaluate(x_test_scaled,y_test)
0s 29us/sample - loss: 0.5563
0.3711079419583313