import pandas as pd
df = pd.DataFrame([['red', 'M', 10.1, 'type1'],
['blue', 'L', 12.5, 'type1'],
['white', 'XL', 13.3, 'type2']])
df.columns = ['color', 'size', 'price', 'type']
from sklearn.preprocessing import OneHotEncoder
X = df[['color', 'size', 'price']].values
color_ohe = OneHotEncoder()
# first to know about the X
print(X[:, 0],"X[:, 0]")
#reshape it
print(X[:, 0].reshape(-1, 1),"X[:, 0].reshape(-1, 1)")
# fit_transform it
print(color_ohe.fit_transform(X[:, 0].reshape(-1, 1)).toarray())
print("fit transform")
)
sklearn resut
# create a pandas dump method to make one hot
df = df[['color', 'size', 'price']]
df_dump = pd.get_dummies(df, columns=['color'])
print(df_dump)
#remove the rebundant method
df_dump = pd.get_dummies(df, columns=['color'],drop_first=True)
print(df_dump)
result: