数据标准化处理
问题:
量纲不一:就是单位、特征的单位不一致,不能放在一起比较
处理方法:
0-1标准化、Z标准化、normalizer归一化
案例:
# -*- coding: utf-8 -*-
import pandas
import numpy
data=pandas.read_csv(
'D:\\DATA\\pycase\\number2\\6.1\\data1.csv'
)
## min-max(0-1)标准化
# 导入(0-1)标准化方法
from sklearn.preprocessing import MinMaxScaler
scaler=MinMaxScaler()
# 错误解决ValueError: Expected 2D array, got 1D array instead:
# array=[4742.92 3398. 2491.9 2149. 2070. ].
# Reshape your data either using array.reshape(-1, 1) if your data has a single feature # # or array.reshape(1, -1) if it contains a single sample.
### 使用array.reshape(-1, 1)重新调整你的数据)python3 加values
data['0-1标准化票房']=scaler.fit_transform(data['票房'].values.reshape(-1,1))
data['0-1标准化评分']=scaler.fit_transform(data['评分'].values.reshape(-1,1))
# Z-score 标准化
from sklearn.preprocessing import scale
data['Z标准化票房']=scale(data['票房'].values.reshape(-1,1))
data['Z标准化评分']=scale(data['评分'].values.reshape(-1,1))
## normalizer 归一化
from sklearn.preprocessing import Normalizer
scaler=Normalizer()
data['归一化票房']=scaler.fit_transform(
data['票房'].values.reshape(1,-1)
)[0]
data['归一化评分']=scaler.fit_transform(
data['评分'].values.reshape(1,-1)
)[0]