数据归一化
归一化(Normalization):将一列数据变化到某个固定区间(范围)中,通常,这个区间是[0, 1],广义的讲,可以是各种区间,比如映射到[0,1]一样可以继续映射到其他范围,图像中可能会映射到[0,255],其他情况可能映射到[-1,1]。
1. 公式
X n o r m a l i z a t i o n = X i − X m i n X m a x − X m i n X_{normalization} = \frac{X_i-X_{min}}{X_{max}-X{min}} Xnormalization=Xmax−XminXi−Xmin
2. 实现
- 自己实现:
def normalization(X):
"""X : ndarray 对象"""
min_ = X.min(axis=0) # 得到的是 Series 对象
max_ = X.max(axis=0)
row_num = X.shape[0] # row_num 是X的行数
ranges = max_ - min_
print(ranges)
molecule = X - np.tile(min_, (row_num, 1)) # np.tile(A, (row, col) ) :把A(看做整体)复制row行,col 列,分子:X-min
denominator = np.tile(ranges, (row_num, 1)) # 分母 max-min
X = molecule/denominator
return X, min_, ranges
- 调包实现
from sklearn.preprocessing import MinMaxScaler
def normalization2(X):
minmax = MinMaxScaler() # 对象实例化
X = minmax.fit_transform(X)
return X
- 全部代码:
import pandas as pd
import numpy as np
from sklearn.preprocessing import MinMaxScaler
def normalization(X):
"""X : ndarray 对象"""
min_ = X.min(axis=0) # 得到的是 Series 对象
max_ = X.max(axis=0)
row_num = X.shape[0] # row_num 是X的行数
ranges = max_ - min_
molecule = X - np.tile(min_, (row_num, 1)) # np.tile(A, (row, col) ) :把A(看做整体)复制row行,col 列,分子:X-min
denominator = np.tile(ranges, (row_num, 1)) # 分母 max-min
X = molecule/denominator
return X, np.array(min_), np.array(ranges)
def normalization2(X):
minmax = MinMaxScaler() # 对象实例化
X = minmax.fit_transform(X)
return X
if __name__ == '__main__':
data = pd.DataFrame({0: [2, 1, 5, 4, 3],
1: [8, 7, 10, 9, 6]})
print(data)
X, x, y = normalization(np.array(data))
print(X)
print(x)
print(y)
X = normalization2(data)
print(X)