from matplotlib.pyplot importas pltfrom sklearn.datasets importmake_blobsfrom sklearn.preprocessing importMinMaxScalerfrom sklearn.model_selection importtrain_test_split#构造数据
X, _ = make_blobs(n_samples=60,
centers=5,
random_state=7,
cluster_std=2)
X_train, X_test=train_test_split(X,
random_state=9,
test_size=0.1)
fig, axes= plt.subplots(nrows=1,
ncols=3,
figsize=(13, 4)
)#绘制未经放缩的数据的训练集和测试集
axes[0].scatter(X_train[:, 0],
X_train[:,1],
c='b',
label='Training set',
s=60)
axes[0].scatter(X_test[:, 0],
X_test[:,1],
marker='^',
c='r',
label='Test set',
s=60)
axes[0].legend(loc=1)
axes[0].set_title('Original Data')#利用 MinMaxScaler 放缩数据
scaler =MinMaxScaler()
scaler.fit(X_train)
X_train_scaled=scaler.transform(X_train)
X_test_scaled=scaler.transform(X_test)#可视化正确放缩的数据
axes[1].scatter(X_train_scaled[:, 0],
X_train_scaled[:,1],
c='b',
label='Training set',
s=60)
axes[1].scatter(X_test_scaled[:, 0],
X_test_scaled[:,1],
marker='^',
c='r',
label='Test set',
s=60)
axes[1].set_title('Scaled Data')#对测试集进行单独放缩
test_scaler =MinMaxScaler()
test_scaler.fit(X_test)
X_test_scaled_badly=test_scaler.transform(X_test)#可视化错误放缩的数据
axes[2].scatter(X_train_scaled[:, 0],
X_train_scaled[:,1],
c='b',
label='Training set',
s=60)
axes[2].scatter(X_test_scaled_badly[:, 0],
X_test_scaled_badly[:,1],
marker='^',
c='r',
label='Test set',
s=60)
axes[2].set_title('Improperly Scaled Data')#为每幅图添加坐标轴标题
for ax inaxes:
ax.set_xlabel('Feature 0')
ax.set_ylabel('Feature 1')
plt.show()