#导入库
import numpy as np
import matplotlib.pyplot as plt
from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
#数据简单处理
digits = load_digits() #实例化
digits.data.shape
#将数据集转换为DataFrame格式
import pandas as pd
pd.DataFrame(digits.data)
#标准转换
pd.concat((pd.DataFrame(digits.data),pd.DataFrame(digits.target)),axis=1)
#建模
X, y = digits.data, digits.target
Xtrain,Xtest,Ytrain,Ytest = train_test_split(X,y,test_size=0.3,random_state=420)
Ytrain.shape
gnb = GaussianNB().fit(Xtrain,Ytrain)
#查看分类准确性
acc_score = gnb.score(Xtest,Ytest)
acc_score
#查看预测结果
Y_pred = gnb.predict(Xtest)
Y_pred.shape
#查看预测的概率结果
prob = gnb.predict_proba(Xtest)
prob[0,:] #每一列对应一个标签下的概率
prob[0,:].sum() #每一行的和都是一
from sklearn.metrics import confusion_matrix as CM
CM(Ytest,Y_pred)
作业1:图形展示测试效果
import matplotlib.pyplot as plt
import seaborn as sns
digits.images.shape
##用图的形式展示20张testing图像的测试效果,要求图上要标注预测的类别和真实类别
fig,axes=plt.subplots(4,5,figsize=(20,10)
,subplot_kw={'xticks':[],'yticks':[]}
,gridspec_kw=dict(hspace=0.1,wspace=0.1)
)
for i,ax in enumerate(axes.flat):
ax.imshow(digits.images[Ytest[i]],cmap='binary'
,interpolation='nearest')
ax.text(0.05,0.75,str(Ytest[i]),
transform=ax.transAxes,color='black',fontsize=32) #图的左上方标注真实类别
ax.text(0.05,0.05,str(Y_pred[i]),
transform=ax.transAxes, color='green' if (Ytest[i]==Y_pred[i]) else 'red',fontsize=32) #图的左下方标注预测类别
plt.show()
作业2:热力图展示预测效果
import matplotlib as mpl
mat=CM(Ytest,Y_pred)
plt.figure(figsize=(12,10),dpi=100)
sns.heatmap(mat,square=True,annot=True,cbar=False, cmap='Reds',center= 20
)
plt.xlabel('predicted value')
plt.ylabel('true value')
plt.title('Correlogram of digits', fontsize=22)
plt.xticks(fontsize=12)
plt.yticks(fontsize=12)
plt.show()