X = OneHotEncoder().fit_transform(X_data).todense()#one-hot编码
import numpy as np
import pandas as pd
from sklearn.preprocessing import OneHotEncoder
defoneHot(df):
new_cols =[]for old_col inlist(df.columns):
new_cols +=sorted(['{0}_onehot_{1}'.format(old_col,str(x).lower())for x inset(df[old_col].values)])
ec = OneHotEncoder()
ec.fit(df.values)return pd.DataFrame(ec.transform(df).toarray(),columns=new_cols)if __name__ =='__main__':
df = pd.DataFrame(np.arange(24).reshape(4,6))print(df)print(oneHot(df))
012345001234516789101121213141516173181920212223
FutureWarning: The handling of integer data will change in version 0.22. Currently, the categories are determined based on the range[0,max(values)],whilein the future they will be determined based on the unique values.
If you want the future behaviour and silence this warning, you can specify "categories='auto'".
In case you used a LabelEncoder before this OneHotEncoder to convert the categories to integers, then you can now use the OneHotEncoder directly.
warnings.warn(msg, FutureWarning)0123456...1718192021222301.00.00.00.01.00.00.0...0.00.00.01.00.00.00.010.01.00.00.00.01.00.0...1.00.00.00.01.00.00.020.00.01.00.00.00.01.0...0.01.00.00.00.01.00.030.00.00.01.00.00.00.0...0.00.01.00.00.00.01.0[4 rows x 24 columns]