Cifar-10 导入数据，截取部分进行分析knn，svm，pca

最新推荐文章于 2024-03-05 01:36:13 发布

鄢广杰

最新推荐文章于 2024-03-05 01:36:13 发布

阅读量948

点赞数 1

分类专栏： python课堂实验

本文链接：https://blog.csdn.net/guangjie2333/article/details/106418860

版权

python课堂实验专栏收录该内容

13 篇文章 1 订阅

订阅专栏

# -*- coding: utf-8 -*-
"""
Created on Wed May 27 22:56:16 2020

@author: guangjie2333

"""


"""

库调用

"""
import numpy as np
import pickle
import matplotlib.pyplot as plt
import PIL.Image as image
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn import svm
"""

内部函数

"""

#读文件的函数
def unpickle(file):  
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
        fo.close()
    return dict



""""
主函数
"""   

if __name__ == '__main__':  
    
    #------第一题-------
    
    # (x_train,y_train),(x_test,y_test) = cifar10.load_data()
    
    #导入文件    
    batches = unpickle('batches.meta')
    # print(batches.keys())
    name = batches.get(b'label_names') #为什么要加个b?
    # print(name[0])

    data_batch1 = unpickle('data_batch_1')
    data_batch2 = unpickle('data_batch_2')
    data_batch3 = unpickle('data_batch_3')
    data_batch4 = unpickle('data_batch_4')
    data_batch5 = unpickle('data_batch_5')
    test_batch  = unpickle('test_batch')
    
    # print(data_batch1.keys())
    
    
    #训练数据
    #axis=1 向右增广，axis=0 向下增广
    x_train = np.concatenate([data_batch1[b'data'],
                              data_batch2[b'data'],
                              data_batch3[b'data'],
                              data_batch4[b'data'],
                              data_batch5[b'data']],axis=0)
    
    y_train = np.concatenate([data_batch1.get(b'labels'),
                              data_batch2.get(b'labels'),
                              data_batch3.get(b'labels'),
                              data_batch4.get(b'labels'),
                              data_batch5.get(b'labels')],axis=0)
    
    
    #测试数据
    x_test = test_batch.get(b'data')
    
    y_test = test_batch.get(b'labels')

    


 
    # # 按列可视化x_train
    
    classifle = 10   #我要画10种类型的图片
    
    picture_num = 5  #每种类型画5张图
    
    classplot = 0    #我当前要画的类型
    
    classplot_y = 0  #每一种类型我要画5个，当前画到了第classplot_y个
    
    # #画一张彩色照片
    # # img0 = x_train[0]
    # # img_reshape = img0.reshape(3,32,32)
    # # r = image.fromarray(img_reshape[0]).convert('L')
    # # g = image.fromarray(img_reshape[1]).convert('L')
    # # b = image.fromarray(img_reshape[2]).convert('L')
    # # img_m = image.merge('RGB',(r,g,b))
    # # plt.imshow(img_m)
    # # plt.show()
 
    #   #-------第二题--------
    plt.figure(figsize=(5, 10))
    
    for classplot in range(classifle):
        j = 0
        #对5w张图片遍历 (事实上找寻的很少)  
        for i in y_train:
            # 遍历到我需要的类型
            if i ==  classplot :
                if classplot_y :
                    sub = plt.subplot(picture_num, classifle, classplot_y * classifle + classplot+1)
                else:
                    #第0行加标题
                    sub = plt.subplot(picture_num, classifle, classplot_y * classifle + classplot+1,
                                                title=name[classplot])
                                  
                   
                sub.axis("off")
                img0 = x_train[j]
                img_reshape = img0.reshape(3,32,32)
                r = image.fromarray(img_reshape[0]).convert('L')
                g = image.fromarray(img_reshape[1]).convert('L')
                b = image.fromarray(img_reshape[2]).convert('L')
                img_m = image.merge('RGB',(r,g,b))
                sub.imshow(img_m)
                classplot_y = classplot_y + 1;
                   
     
            j = j + 1
            
            if classplot_y >= 5 :  
                classplot_y = 0;
                break;
    
    
    
    #另外，直接import数据的话就用这种办法
    # x_train = x_train.reshape(50000, 3, 32, 32)
    
    # for classplot in range(classifle):
    #     pos = np.argwhere(y_train == classplot)[0:picture_num,0]
    #     for i in range(picture_num):
    #         plt.subplot(picture_num, classifle, i * classifle + classplot+1)
    #         plt.imshow(x_train[pos[i]])
    #         if i == 0:
    #             plt.title(name[classplot])
    #         plt.axis('off')   
                
                
    #-------第三题--------
    x_train = x_train[0:500]
    y_train = y_train[0:500]
    x_test = x_test[0:500]
    y_test = y_test[0:500]
    
    
    
     #-------第四题--------
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(x_train, y_train)
    y_test_predict = neigh.predict(x_test)
    
    #计算精度
    print("Accuracy1",metrics.accuracy_score(y_test,y_test_predict));
    
    
    #-------第五题--------
    
    #多次尝试后 选择n_neighbors=1
    
    
    #-------第六题--------  
    
    pca = PCA(n_components=2)
    pca.fit(x_train) 
    x_train_reduction = pca.transform(x_train)
    x_test_reduction  = pca.transform(x_test)
    
    
    #-------第七题-------- 
    knn_pca = KNeighborsClassifier(n_neighbors=1)
    knn_pca.fit(x_train_reduction,y_train)
    y_test_predict = knn_pca.predict(x_test_reduction)
        
    #计算精度
    print("Accuracy2",metrics.accuracy_score(y_test,y_test_predict));
        
        
    #-------第八题-------- 
    clf = svm.SVC(kernel = 'rbf', C = 1000, gamma=0.5)
    clf.fit(x_train_reduction,y_train)
    y_test_predict = clf.predict(x_test_reduction)
    
    #计算精度
    print("Accuracy3",metrics.accuracy_score(y_test,y_test_predict));



    #PCA可以实现降为，之后实现加快运算，但是以降低准确度为代价
    #降维后，knn和SVM的准确度都下降了，但是SVM下降的要更多。说明knn受降维的影响小，受主成分影响小，SVM反之。

鄢广杰

关注

1
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
Cifar-10 导入数据，截取部分进行分析knn，svm，pca

# -*- coding: utf-8 -*-"""Created on Wed May 27 22:56:16 2020@author: guangjie2333""""""库调用"""import numpy as npimport pickleimport matplotlib.pyplot as pltimport PIL.Image as imagefrom sklearn.neighbors import KNeighborsClassifierfrom s
复制链接

扫一扫