"""
@Title: Normalization
@Time: 2024/2/21
@Author: Michael Jie
"""
import numpy as np
from sklearn.preprocessing import MinMaxScaler, StandardScaler
"""
线性归一化、均值归一化:
1、最大最小值明确不变
2、数据相对稳定
3、对处理后的数据范围有严格要求
零-均值归一化:
1、在数据存在异常值、最大最小值不固定
非线性归一化(对数归一化、反正切函数归一化、小数定标标准化):
1、数据分化程度较大,需要数学函数对原始值进行映射
"""
# 线性归一化,将数据值映射到[0, 1]之间,适用于数据较集中的情况
def max_min_normalization(x):
x = np.array(x)
x = (x - np.min(x)) / (np.max(x) - np.min(x))
return list(x)
# 均值归一化
def mean_normalization(x):
x = np.array(x)
x = (x - np.mean(x)) / (np.max(x) - np.min(x))
return list(x)
# 零-均值归一化(标准化)
def z_score_normalization(x):
x = np.array(x)
x = (x - np.mean(x)) / np.std(x)
return list(x)
# 对数归一化
def log_normalization(x):
x = np.array(x)
x = np.log(x) / np.log(np.max(x))
return list(x)
# 反正切函数归一化
def arctan_normalization(x):
x = np.array(x)
x = np.arctan(x) * (2 / np.pi)
return list(x)
# 小数定标标准化
def decimal_normalization(x):
x = np.array(x)
k = 0
x = x / (10 ** k)
return list(x)
if __name__ == '__main__':
datas = [1, 0.7, 2.4, 2.1, 1.5, 2.7, 0.9, 1.6, 0.5, 10]
# 数据不稳定,使用线性归一化,数据映射趋近于0
print("线性归一化:{x}".format(x=max_min_normalization(datas)))
print("均值归一化:{x}".format(x=mean_normalization(datas)))
print("零-均值归一化(标准化):{x}".format(x=z_score_normalization(datas)))
# 线性归一化
min_max_scaler = MinMaxScaler() # 实例化
result = min_max_scaler.fit_transform(datas) # 归一化
min_max_scaler.inverse_transform(result) # 归一化逆操作
# 零-均值归一化(标准化)
standard_scaler = StandardScaler()
result = standard_scaler.fit_transform(datas)
standard_scaler.inverse_transform(result)
"""
线性归一化:[0.05263157894736842, 0.021052631578947364, 0.19999999999999998, 0.16842105263157894, 0.10526315789473684,
0.23157894736842108, 0.042105263157894736, 0.11578947368421054, 0.0, 1.0]
均值归一化:[-0.1410526315784735, -0.172631578947684, 0.006315789736842165, -0.02526315789736817, -0.0884210263157893,
0.037894736842105294, -0.15157894736842104, -0.07789473684210524, -0.19368421052631576, 0.8063157894736842]
零-均值归一化(标准化):[-0.5062410229091, -0.6195785499784, 0.02266750858865, -0.0907003336553, -0.313451184177938,
0.1360050507504832, -0.5440202030019322, -0.2795659376537707, -0.6951369260580246, 2.893885246524168]
"""