数据预处理的几种方法
import numpy as np
from sklearn import preprocessing
data = np.array([[ 3, -1.5, 2, -5.4],
[ 0, 4, -0.3, 2.1],
[ 1, 3.3, -1.9, -4.3]])
# 去除均值
data_standardized = preprocessing.scale(data)
print( "\nMean =", data_standardized.mean(axis=0))
print( "Std deviation =", data_standardized.std(axis=0))
# 将特征缩放至特定范围内
data_scaler = preprocessing.MinMaxScaler(feature_range=(0, 1))
data_scaled = data_scaler.fit_transform(data)
print( "\nMin max scaled data:\n", data_scaled)
# 归一化
data_normalized = preprocessing.normalize(data, norm='l1')
print( "\nL1 normalized data:\n", data_normalized)
# 特征二值化
data_binarized = preprocessing.Binarizer(threshold=1.4).transform(data)
print( "\nBinarized data:\n", data_binarized)
# 分类特征编码
encoder = preprocessin