python中K近邻分类器(无参训练)对数据进行类别预测 可视化

from sklearn.datasets import load_iris
iris = load_iris()
iris.data.shape
print(iris.DESCR)
from sklearn.cross_validation import train_test_split
X_train,X_test,y_train,y_test=train_test_split(iris.data,iris.target,test_size=0.25,random_state=33)
from sklearn.preprocessing import StandardScaler
from sklearn.neighbors import KNeighborsClassifier
ss = StandardScaler()
X_train=ss.fit_transform(X_train)
X_test=ss.transform(X_test)
knc=KNeighborsClassifier()
knc.fit(X_train,y_train)
y_predict=knc.predict(X_test)
print('The accuray of K-Nearest Neighbor Classifier is',knc.score(X_test,y_test))
from sklearn.metrics import classification_report
print(classification_report(y_test,y_predict,target_names=iris.target_names))


from matplotlib import pyplot as plt
import numpy as np 

def show_values(pc, fmt="%.2f", **kw):
    '''
    Heatmap with text in each cell with matplotlib's pyplot
    Source: https://stackoverflow.com/a/25074150/395857 
    By HYRY
    '''
    global zip
    import  itertools
    zip = getattr(itertools, 'izip', zip)
    pc.update_scalarmappable()
    ax = pc.axes
    for p, color, value in  zip(pc.get_paths(), pc.get_facecolors(), pc.get_array()):
        x, y = p.vertices[:-2, :].mean(0)
        if np.all(color[:3] > 0.5):
            color = (0.0, 0.0, 0.0)
        else:
            color = (1.0, 1.0, 1.0)
        ax.text(x, y, fmt % value, ha="center", va="center", color=color, **kw)


def cm2inch(*tupl):
    '''
    Specify figure size in centimeter in matplotlib
    Source: https://stackoverflow.com/a/22787457/395857
    By gns-ank
    '''
    inch = 2.54
    if type(tupl[0]) == tuple:
        return tuple(i/inch for i in tupl[0])
    else:
        return tuple(i/inch for i in tupl)


def heatmap(AUC, title, xlabel, ylabel, xticklabels, yticklabels, figure_width=40, figure_height=20, correct_orientation=False, cmap='RdBu'):
    '''
    Inspired by:
    - https://stackoverflow.com/a/16124677/395857 
    - https://stackoverflow.com/a/25074150/395857
    '''

    # Plot it out
    fig, ax = plt.subplots()    
    #c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap='RdBu', vmin=0.0, vmax=1.0)
    c = ax.pcolor(AUC, edgecolors='k', linestyle= 'dashed', linewidths=0.2, cmap=cmap)

    # put the major ticks at the middle of each cell
    ax.set_yticks(np.arange(AUC.shape[0]) + 0.5, minor=False)
    ax.set_xticks(np.arange(AUC.shape[1]) + 0.5, minor=False)

    # set tick labels
    #ax.set_xticklabels(np.arange(1,AUC.shape[1]+1), minor=False)
    ax.set_xticklabels(xticklabels, minor=False)
    ax.set_yticklabels(yticklabels, minor=False)

    # set title and x/y labels
    plt.title(title)
    plt.xlabel(xlabel)
    plt.ylabel(ylabel)      

    # Remove last blank column
    plt.xlim( (0, AUC.shape[1]) )

    # Turn off all the ticks
    ax = plt.gca()    
    for t in ax.xaxis.get_major_ticks():
        t.tick1On = False
        t.tick2On = False
    for t in ax.yaxis.get_major_ticks():
        t.tick1On = False
        t.tick2On = False

    # Add color bar
    plt.colorbar(c)

    # Add text in each cell 
    show_values(c)

    # Proper orientation (origin at the top left instead of bottom left)
    if correct_orientation:
        ax.invert_yaxis()
        ax.xaxis.tick_top()       

    # resize 
    fig = plt.gcf()
    #fig.set_size_inches(cm2inch(40, 20))
    #fig.set_size_inches(cm2inch(40*4, 20*4))
    fig.set_size_inches(cm2inch(figure_width, figure_height))



def plot_classification_report(classification_report, title='Classification report ', cmap='RdBu'):
    '''
    Plot scikit-learn classification report.
    Extension based on https://stackoverflow.com/a/31689645/395857 
    '''
    lines = classification_report.split('\n')

    classes = []
    plotMat = []
    support = []
    class_names = []
    for line in lines[2 : (len(lines) - 2)]:
        t = line.strip().split()
        if len(t) < 2: continue
        classes.append(t[0])
        v = [float(x) for x in t[1: len(t) - 1]]
        support.append(int(t[-1]))
        class_names.append(t[0])
        print(v)
        plotMat.append(v)

    print('plotMat: {0}'.format(plotMat))
    print('support: {0}'.format(support))

    xlabel = 'Metrics'
    ylabel = 'Classes'
    xticklabels = ['Precision', 'Recall', 'F1-score']
    yticklabels = ['{0} ({1})'.format(class_names[idx], sup) for idx, sup  in enumerate(support)]
    figure_width = 25
    figure_height = len(class_names) + 7
    correct_orientation = False
    heatmap(np.array(plotMat), title, xlabel, ylabel, xticklabels, yticklabels, figure_width, figure_height, correct_orientation, cmap=cmap)

#传入相应的report结果
def main():
    sampleClassificationReport =classification_report(y_test,y_predict,target_names=iris.target_names)
    plot_classification_report(sampleClassificationReport)
    plt.savefig('knear_neighbor_report.png', dpi=200, format='png', bbox_inches='tight')
    plt.close()


if __name__ == "__main__":
    main()
    #cProfile.run('main()') # if you want to do some profiling



预测结果如下:
The accuray of K-Nearest Neighbor Classifier is 0.8947368421052632
             precision    recall  f1-score   support

     setosa       1.00      1.00      1.00         8
 versicolor       0.73      1.00      0.85        11
  virginica       1.00      0.79      0.88        19

avg / total       0.92      0.89      0.90        38
相应的报表如下
 
 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
在使用scikit-learn软件包对mnist数据进行分类之前,我们首先需要导入相关的库和数据集。 ```python from sklearn.datasets import fetch_openml from sklearn.model_selection import train_test_split from sklearn.neighbors import KNeighborsClassifier from sklearn.tree import DecisionTreeClassifier from sklearn.svm import SVC from sklearn.naive_bayes import GaussianNB from sklearn.linear_model import LogisticRegression import matplotlib.pyplot as plt # 导入mnist数据集 X, y = fetch_openml('mnist_784', version=1, return_X_y=True) ``` 接下来,我们将数据集分为训练集和测试集,其80%的数据用于训练模型,20%的数据用于评估模型的准确性。 ```python # 将数据集分为训练集和测试集 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) ``` 接下来,我们使用不同的分类器对mnist数据进行分类,并对结果进行可视化。 1. K近邻算法(K-Nearest Neighbors): ```python knn = KNeighborsClassifier() knn.fit(X_train, y_train) accuracy = knn.score(X_test, y_test) print("K近邻算法准确率:", accuracy) ``` 2. 决策树算法(Decision Tree): ```python tree = DecisionTreeClassifier() tree.fit(X_train, y_train) accuracy = tree.score(X_test, y_test) print("决策树算法准确率:", accuracy) ``` 3. 支持向量机算法(Support Vector Machine): ```python svm = SVC() svm.fit(X_train, y_train) accuracy = svm.score(X_test, y_test) print("支持向量机算法准确率:", accuracy) ``` 4. 朴素贝叶斯算法(Naive Bayes): ```python nb = GaussianNB() nb.fit(X_train, y_train) accuracy = nb.score(X_test, y_test) print("朴素贝叶斯算法准确率:", accuracy) ``` 5. 逻辑回归算法(Logistic Regression): ```python lr = LogisticRegression() lr.fit(X_train, y_train) accuracy = lr.score(X_test, y_test) print("逻辑回归算法准确率:", accuracy) ``` 最后,我们可以使用Matplotlib库将测试集的一些样本可视化,以便更直观地观察分类结果。 ```python # 随机选择测试集的一些样本进行可视化 random_indices = np.random.choice(range(len(X_test)), size=36, replace=False) selected_images = X_test[random_indices] selected_labels = y_test[random_indices] # 创建子图,并在每个子图上显示样本的图像和标签 n_rows = 6 n_cols = 6 fig, axes = plt.subplots(n_rows, n_cols, figsize=(10, 10)) for i, ax in enumerate(axes.flatten()): image = selected_images[i].reshape(28, 28) ax.imshow(image, cmap='gray') ax.axis('off') ax.set_title(f"Label: {selected_labels[i]}") plt.tight_layout() plt.show() ``` 以上就是基于scikit-learn软件包对mnist数据进行分类,并可视化的过程,其尝试了K近邻算法、决策树算法、支持向量机算法、朴素贝叶斯算法和逻辑回归算法。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值