## 建模前的数据准备, 数值变量进行标准化,离散变量标签化
## df_data:DataFrame类型数据
## label_name:目标变量名称
## unique_id:数据集唯一标识
def Data_standarder(df_data , label_name, unique_id):
## 得到标注
label = df_data[label_name]
## 去掉标注和ID
import copy
feature_X = copy.deepcopy(df_data.drop([unique_id , label_name] , axis = 1))
##
O_index , C_index = distinguish_Char_Num(feature_X)
## 对数值型变量标准化
from sklearn.preprocessing import MinMaxScaler , StandardScaler , LabelEncoder , OneHotEncoder
## 对数值型变量进行标准化
col_lst_O = O_index
for col_name in col_lst_O:
feature_X[col_name] = MinMaxScaler().fit_transform(feature_X[col_name].values.reshape(-1, 1)).reshape(1, -1)[0]
## 对分类变量进行onehot 编码
col_lst_C = C_index
for col_name in col_lst_C:
feature_X[col_name] = LabelEncoder().fit_transform(feature_X[col_name])
return feature_X , label
PYTHON对数值变量进行标准化,离散变量标签化
于 2021-02-08 10:37:30 首次发布