# 数据标准化 Python代码

import pandas as pd
import numpy as np

datafile = '../data/normalization_data.xls'  # 参数初始化


# 最小-最大规范化

x ∗ = x − m i n m a x − m i n ( n e w _ m a x − n e w _ m i n ) + n e w _ m i n x^*=\frac{x-min}{max-min}(new\_max-new\_min)+new\_min

>>> (data - data.min()) / (data.max() - data.min()) * (100 - 0) + 0	# 最小-最大规范化
0           1           2           3
0    7.438017   93.729097   92.351974  100.000000
1   61.983471    0.000000    0.000000   85.094067
2   21.487603   11.956522   81.332237    0.000000
3    0.000000  100.000000  100.000000   56.367583
4  100.000000   94.230769   99.671053   80.414858
5   26.446281   83.862876   81.496711   90.931018
6   63.636364   84.698997   78.618421   92.957067


# 零-均值规范化

x ∗ = x − x ˉ σ x^*=\frac{x-\bar{x}}{\sigma}

>>> (data - data.mean()) / data.std()  # 零-均值规范化
0         1         2         3
0 -0.905383  0.635863  0.464531  0.798149
1  0.604678 -1.587675 -2.193167  0.369390
2 -0.516428 -1.304030  0.147406 -2.078279
3 -1.111301  0.784628  0.684625 -0.456906
4  1.657146  0.647765  0.675159  0.234796
5 -0.379150  0.401807  0.152139  0.537286
6  0.650438  0.421642  0.069308  0.595564


# 小数定标规范化

x ∗ = x 1 0 k x^*=\frac{x}{10^k}

>>> data / 10 ** np.ceil(np.log10(data.abs().max()))  # 小数定标规范化
0      1      2       3
0  0.078  0.521  0.602  0.2863
1  0.144 -0.600 -0.521  0.2245
2  0.095 -0.457  0.468 -0.1283
3  0.069  0.596  0.695  0.1054
4  0.190  0.527  0.691  0.2051
5  0.101  0.403  0.470  0.2487
6  0.146  0.413  0.435  0.2571

11-25 1万+

07-15 2534
10-14 1万+
10-25 6164
07-14 1915
10-17 246
06-16 2万+
11-20 2088
©️2020 CSDN 皮肤主题: 创作都市 设计师:CSDN官方博客