导入相关包
from sklearn import preprocessing
import numpy as np
1. 标准化
标准化后的数据的均值=0,标准差=1
# 标准化
X = np.array([[1,3,4],
[2,3,0],
[1,2,4]])
X_scaled = preprocessing.scale(X)
print(X_scaled)
输出:
[[-0.70710678 0.70710678 0.70710678]
[ 1.41421356 0.70710678 -1.41421356]
[-0.70710678 -1.41421356 0.70710678]]
2. 正则化
# 正则化
X = np.array([[1,3,4],
[2,3,0],
[1,2,4]])
X_normalized= preprocessing.normalize(X,norm='l2') # L2范数。norm : {'l1', 'l2', 'max'}, default='l2'
print(X_scaled)
输出:
[[-0.70710678 0.70710678 0.70710678]
[ 1.41421356 0.70710678 -1.41421356]
[-0.70710678 -1.41421356 0.70710678]]
3. 归一化
把数据映射到0~1范围之内
# 归一化
X = np.array([[1,-1,2],
[2,0,0],
[-1,2,4]])
min_max_scaler = preprocessing.MinMaxScaler()
X_train_minmax = min_max_scaler.fit_transform(X)
print(X_train_minmax)
输出:
[[0.66666667 0. 0.5 ]
[1. 0.33333333 0. ]
[0. 1. 1. ]]