1. 文本转id 然后id转onehot
import tensorflow as tf
from sklearn.preprocessing import LabelEncoder
cats=["股票","母婴","电子","母婴","母婴","房产","股票","房产","电子","电商","股票" ]
label_encoder = LabelEncoder()
label_encoder.fit_transform(cats)
ys_index = label_encoder.transform(cats)
print(ys_index)
# 输出
# [4 1 3 1 1 0 4 0 3 2 4 4]
ys_onehot = tf.keras.utils.to_categorical(ys_index, 5)
print(ys_onehot)
# 输出
# [[0. 0. 0. 0. 1.]
# [0. 1. 0. 0. 0.]
# [0. 0. 0. 1. 0.]
# [0. 1. 0. 0. 0.]
# [0. 1. 0. 0. 0.]
# [1. 0. 0. 0. 0.]
# [0. 0. 0. 0. 1.]
# [1. 0. 0. 0. 0.]
# [0. 0. 0. 1. 0.]
# [0. 0. 1. 0. 0.]
# [0. 0. 0. 0. 1.]
# [0. 0. 0. 0. 1.]]