1、基于sklearn
from numpy import array
from numpy import argmax
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import OneHotEncoder
# define example
data = ['cold', 'cold', 'warm', 'cold', 'hot', 'hot', 'warm', 'cold', 'warm', 'hot']
values = array(data)
print(values)
# integer encode
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(values)
print(integer_encoded)
# binary encode
onehot_encoder = OneHotEncoder(sparse=False)
integer_encoded = integer_encoded.reshape(len(integer_encoded), 1)
onehot_encoded = onehot_encoder.fit_transform(integer_encoded)
print(onehot_encoded)
# invert first example
inverted = label_encoder.inverse_transform([argmax(onehot_encoded[0, :])])
print(inverted)
2、手动
def to_categorical(y, num_classes=None):
y = np.array(y, dtype='int')
input_shape = y.shape
if input_shape and input_shape[-1] == 1 and len(input_shape) > 1:
input_shape = tuple(input_shape[:-1])
y = y.ravel()
if not num_classes:
num_classes = np.max(y) + 1
n = y.shape[0]
categorical = np.zeros((n, num_classes))
categorical[np.arange(n), y] = 1
output_shape = input_shape + (num_classes,)
categorical = np.reshape(categorical, output_shape)
return categorical
data = [1, 3, 2, 0, 3, 2, 2, 1, 0, 1]
encoded=to_categorical(data)
print(encoded)
'''
[[ 0. 1. 0. 0.]
[ 0. 0. 0. 1.]
[ 0. 0. 1. 0.]
[ 1. 0. 0. 0.]
[ 0. 0. 0. 1.]
[ 0. 0. 1. 0.]
[ 0. 0. 1. 0.]
[ 0. 1. 0. 0.]
[ 1. 0. 0. 0.]
[ 0. 1. 0. 0.]]
'''