Cifar-10 导入数据,截取部分进行分析knn,svm,pca

# -*- coding: utf-8 -*-
"""
Created on Wed May 27 22:56:16 2020

@author: guangjie2333

"""


"""

库调用

"""
import numpy as np
import pickle
import matplotlib.pyplot as plt
import PIL.Image as image
from sklearn.neighbors import KNeighborsClassifier
from sklearn import metrics
from sklearn.decomposition import PCA
from sklearn import svm
"""

内部函数

"""

#读文件的函数
def unpickle(file):  
    with open(file, 'rb') as fo:
        dict = pickle.load(fo, encoding='bytes')
        fo.close()
    return dict



""""
主函数
"""   

if __name__ == '__main__':  
    
    #------第一题-------
    
    # (x_train,y_train),(x_test,y_test) = cifar10.load_data()
    
    #导入文件    
    batches = unpickle('batches.meta')
    # print(batches.keys())
    name = batches.get(b'label_names') #为什么要加个b?
    # print(name[0])

    data_batch1 = unpickle('data_batch_1')
    data_batch2 = unpickle('data_batch_2')
    data_batch3 = unpickle('data_batch_3')
    data_batch4 = unpickle('data_batch_4')
    data_batch5 = unpickle('data_batch_5')
    test_batch  = unpickle('test_batch')
    
    # print(data_batch1.keys())
    
    
    #训练数据
    #axis=1 向右增广,axis=0 向下增广
    x_train = np.concatenate([data_batch1[b'data'],
                              data_batch2[b'data'],
                              data_batch3[b'data'],
                              data_batch4[b'data'],
                              data_batch5[b'data']],axis=0)
    
    y_train = np.concatenate([data_batch1.get(b'labels'),
                              data_batch2.get(b'labels'),
                              data_batch3.get(b'labels'),
                              data_batch4.get(b'labels'),
                              data_batch5.get(b'labels')],axis=0)
    
    
    #测试数据
    x_test = test_batch.get(b'data')
    
    y_test = test_batch.get(b'labels')

    


 
    # # 按列可视化x_train
    
    classifle = 10   #我要画10种类型的图片
    
    picture_num = 5  #每种类型画5张图
    
    classplot = 0    #我当前要画的类型
    
    classplot_y = 0  #每一种类型我要画5个,当前画到了第classplot_y个
    
    # #画一张彩色照片
    # # img0 = x_train[0]
    # # img_reshape = img0.reshape(3,32,32)
    # # r = image.fromarray(img_reshape[0]).convert('L')
    # # g = image.fromarray(img_reshape[1]).convert('L')
    # # b = image.fromarray(img_reshape[2]).convert('L')
    # # img_m = image.merge('RGB',(r,g,b))
    # # plt.imshow(img_m)
    # # plt.show()
 
    #   #-------第二题--------
    plt.figure(figsize=(5, 10))
    
    for classplot in range(classifle):
        j = 0
        #对5w张图片遍历 (事实上找寻的很少)  
        for i in y_train:
            # 遍历到我需要的类型
            if i ==  classplot :
                if classplot_y :
                    sub = plt.subplot(picture_num, classifle, classplot_y * classifle + classplot+1)
                else:
                    #第0行加标题
                    sub = plt.subplot(picture_num, classifle, classplot_y * classifle + classplot+1,
                                                title=name[classplot])
                                  
                   
                sub.axis("off")
                img0 = x_train[j]
                img_reshape = img0.reshape(3,32,32)
                r = image.fromarray(img_reshape[0]).convert('L')
                g = image.fromarray(img_reshape[1]).convert('L')
                b = image.fromarray(img_reshape[2]).convert('L')
                img_m = image.merge('RGB',(r,g,b))
                sub.imshow(img_m)
                classplot_y = classplot_y + 1;
                   
     
            j = j + 1
            
            if classplot_y >= 5 :  
                classplot_y = 0;
                break;
    
    
    
    #另外,直接import数据的话就用这种办法
    # x_train = x_train.reshape(50000, 3, 32, 32)
    
    # for classplot in range(classifle):
    #     pos = np.argwhere(y_train == classplot)[0:picture_num,0]
    #     for i in range(picture_num):
    #         plt.subplot(picture_num, classifle, i * classifle + classplot+1)
    #         plt.imshow(x_train[pos[i]])
    #         if i == 0:
    #             plt.title(name[classplot])
    #         plt.axis('off')   
                
                
    #-------第三题--------
    x_train = x_train[0:500]
    y_train = y_train[0:500]
    x_test = x_test[0:500]
    y_test = y_test[0:500]
    
    
    
     #-------第四题--------
    neigh = KNeighborsClassifier(n_neighbors=1)
    neigh.fit(x_train, y_train)
    y_test_predict = neigh.predict(x_test)
    
    #计算精度
    print("Accuracy1",metrics.accuracy_score(y_test,y_test_predict));
    
    
    #-------第五题--------
    
    #多次尝试后 选择n_neighbors=1
    
    
    #-------第六题--------  
    
    pca = PCA(n_components=2)
    pca.fit(x_train) 
    x_train_reduction = pca.transform(x_train)
    x_test_reduction  = pca.transform(x_test)
    
    
    #-------第七题-------- 
    knn_pca = KNeighborsClassifier(n_neighbors=1)
    knn_pca.fit(x_train_reduction,y_train)
    y_test_predict = knn_pca.predict(x_test_reduction)
        
    #计算精度
    print("Accuracy2",metrics.accuracy_score(y_test,y_test_predict));
        
        
    #-------第八题-------- 
    clf = svm.SVC(kernel = 'rbf', C = 1000, gamma=0.5)
    clf.fit(x_train_reduction,y_train)
    y_test_predict = clf.predict(x_test_reduction)
    
    #计算精度
    print("Accuracy3",metrics.accuracy_score(y_test,y_test_predict));



    #PCA可以实现降为,之后实现加快运算,但是以降低准确度为代价
    #降维后,knn和SVM的准确度都下降了,但是SVM下降的要更多。说明knn受降维的影响小,受主成分影响小,SVM反之。













  • 1
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
好的,我会为您提供代码。但是,需要说明的是,PCA降维方法并不适合对图像进行分类。PCA主要用于数据降维和特征提取,而图像分类需要更复杂的卷积神经网络等技术。如果您想要了解更多有关图像分类的技术,可以咨询专业人士或学习相关的课程。 下面是使用PCA降维方法对cifar-10彩色图像数据进行分类的Python代码: ```python # 导入必要的库 import matplotlib.pyplot as plt import numpy as np from sklearn.decomposition import PCA from sklearn.neighbors import KNeighborsClassifier from keras.datasets import cifar10 # 加载CIFAR-10数据集 (x_train, y_train), (x_test, y_test) = cifar10.load_data() # 将图像数据拉平成一维向量 x_train = x_train.reshape(x_train.shape[0], -1) x_test = x_test.reshape(x_test.shape[0], -1) # 对图像数据进行PCA降维 pca = PCA(n_components=100) x_train = pca.fit_transform(x_train) x_test = pca.transform(x_test) # 对降维后的数据进行KNN分类 knn = KNeighborsClassifier(n_neighbors=5) knn.fit(x_train, y_train) accuracy = knn.score(x_test, y_test) print('Accuracy:', accuracy) ``` 上述代码中,首先使用`sklearn.decomposition`库的`PCA`类将彩色图像数据进行降维,然后使用`sklearn.neighbors`库的`KNeighborsClassifier`类对降维后的数据进行KNN分类。最后,输出分类精度。但是需要注意的是,由于PCA降维过程损失了图像的空间结构信息,因此,使用PCA进行特征提取的KNN分类器性能可能会比使用原始图像数据进行训练的分类器性能差很多。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

鄢广杰

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值