特征工程系列数据链接:https://pan.baidu.com/s/1ZUwOM206B-YUzaNzebi_cg 提取码:7w86
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import numpy as np
import pandas as pd
np.set_printoptions(suppress=True)
简单小数据
views = pd.DataFrame([1295., 25., 19000., 5., 1., 300.], columns=['views'])
views
| views |
---|
0 | 1295.0 |
---|
1 | 25.0 |
---|
2 | 19000.0 |
---|
3 | 5.0 |
---|
4 | 1.0 |
---|
5 | 300.0 |
---|
Standard Scaler
x
i
−
μ
σ
\frac{x_i - \mu}{\sigma}
σxi−μ
ss = StandardScaler()
views['zscore'] = ss.fit_transform(views[['views']])
views
| views | zscore |
---|
0 | 1295.0 | -0.307214 |
---|
1 | 25.0 | -0.489306 |
---|
2 | 19000.0 | 2.231317 |
---|
3 | 5.0 | -0.492173 |
---|
4 | 1.0 | -0.492747 |
---|
5 | 300.0 | -0.449877 |
---|
vw = np.array(views['views'])
(vw[0] - np.mean(vw)) / np.std(vw)
-0.30721413311687235
Min-Max Scaler
x
i
−
m
i
n
(
x
)
m
a
x
(
x
)
−
m
i
n
(
x
)
\frac{x_i - min(x)}{max(x) - min(x)}
max(x)−min(x)xi−min(x)
mms = MinMaxScaler()
views['minmax'] = mms.fit_transform(views[['views']])
views
| views | zscore | minmax |
---|
0 | 1295.0 | -0.307214 | 0.068109 |
---|
1 | 25.0 | -0.489306 | 0.001263 |
---|
2 | 19000.0 | 2.231317 | 1.000000 |
---|
3 | 5.0 | -0.492173 | 0.000211 |
---|
4 | 1.0 | -0.492747 | 0.000000 |
---|
5 | 300.0 | -0.449877 | 0.015738 |
---|
(vw[0] - np.min(vw)) / (np.max(vw) - np.min(vw))
0.068108847834096528
Robust Scaler
x
i
−
m
e
d
i
a
n
(
x
)
I
Q
R
(
1
,
3
)
(
x
)
\frac{x_i - median(x)}{IQR_{(1,3)}(x)}
IQR(1,3)(x)xi−median(x)
rs = RobustScaler()
views['robust'] = rs.fit_transform(views[['views']])
views
| views | zscore | minmax | robust |
---|
0 | 1295.0 | -0.307214 | 0.068109 | 1.092883 |
---|
1 | 25.0 | -0.489306 | 0.001263 | -0.132690 |
---|
2 | 19000.0 | 2.231317 | 1.000000 | 18.178528 |
---|
3 | 5.0 | -0.492173 | 0.000211 | -0.151990 |
---|
4 | 1.0 | -0.492747 | 0.000000 | -0.155850 |
---|
5 | 300.0 | -0.449877 | 0.015738 | 0.132690 |
---|
quartiles = np.percentile(vw, (25., 75.))
iqr = quartiles[1] - quartiles[0]
(vw[0] - np.median(vw)) / iqr
1.0928829915560916