贝叶斯对模拟离散数据的训练,评估和预测
# import package
import random
import numpy as np
from sklearn.naive_bayes import CategoricalNB
from sklearn.model_selection import train_test_split
# data prepare
rng = np.random.RandomState(1)
X = rng.randint(5, size = (600, 100))
y = np.array([1, 2, 3, 4, 5, 6] * 100)
data = np.c_[X, y] # combine X and y base column
random.shuffle(data) # adjust data with random order
X = data[:,:-1] # extract feature
y = data[:, -1] # extract label
print('X:\n', X)
print('y:\n', y)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)
# train
clf = CategoricalNB(alpha = 1) # strategies for 0 Laplace smooth
clf.fit(X_train, y_train)
# evaluate
acc = clf.score(X_test, y_test)
print('test acc : %.3f' % acc)
# predic 1 data
x = rng.randint(5, size=(1, 100))
print(x)
print(clf.predict_proba(x))
print(clf.predict(x))