import pandas as pd
import numpy as np
datafile = '../data/normalization_data.xls' # 参数初始化
data = pd.read_excel(datafile, header=None) # 读取数据
最小-最大规范化
x
∗
=
x
−
m
i
n
m
a
x
−
m
i
n
(
n
e
w
_
m
a
x
−
n
e
w
_
m
i
n
)
+
n
e
w
_
m
i
n
x^*=\frac{x-min}{max-min}(new\_max-new\_min)+new\_min
x∗=max−minx−min(new_max−new_min)+new_min
映射到
0
−
100
0-100
0−100区间
>>> (data - data.min()) / (data.max() - data.min()) * (100 - 0) + 0 # 最小-最大规范化
0 1 2 3
0 7.438017 93.729097 92.351974 100.000000
1 61.983471 0.000000 0.000000 85.094067
2 21.487603 11.956522 81.332237 0.000000
3 0.000000 100.000000 100.000000 56.367583
4 100.000000 94.230769 99.671053 80.414858
5 26.446281 83.862876 81.496711 90.931018
6 63.636364 84.698997 78.618421 92.957067
零-均值规范化
x ∗ = x − x ˉ σ x^*=\frac{x-\bar{x}}{\sigma} x∗=σx−xˉ
>>> (data - data.mean()) / data.std() # 零-均值规范化
0 1 2 3
0 -0.905383 0.635863 0.464531 0.798149
1 0.604678 -1.587675 -2.193167 0.369390
2 -0.516428 -1.304030 0.147406 -2.078279
3 -1.111301 0.784628 0.684625 -0.456906
4 1.657146 0.647765 0.675159 0.234796
5 -0.379150 0.401807 0.152139 0.537286
6 0.650438 0.421642 0.069308 0.595564
小数定标规范化
x ∗ = x 1 0 k x^*=\frac{x}{10^k} x∗=10kx
>>> data / 10 ** np.ceil(np.log10(data.abs().max())) # 小数定标规范化
0 1 2 3
0 0.078 0.521 0.602 0.2863
1 0.144 -0.600 -0.521 0.2245
2 0.095 -0.457 0.468 -0.1283
3 0.069 0.596 0.695 0.1054
4 0.190 0.527 0.691 0.2051
5 0.101 0.403 0.470 0.2487
6 0.146 0.413 0.435 0.2571