Part1.准备工作
import numpy as np
from PIL import Image
import matplotlib.pyplot as plt # 绘图库
import matplotlib as mpl
mpl.rcParams['font.sans-serif'] = ['SimHei'] #显示中文SimHei,FangSong,KaiTi,YouYuan
# 设置路径
file1 = 'E:/Pycharm_projects/cifar-100-python/train'
file2 = 'E:/Pycharm_projects/cifar-100-python/meta'
file3 = 'E:/Pycharm_projects/cifar-100-python/test'
def unpickle(file): # 该函数将cifar100提供的文件读取到python的数据结构(字典)中
import pickle
fo = open(file, 'rb')
dict = pickle.load(fo, encoding='iso-8859-1')
fo.close()
return dict
def get_data(file): #读取数据
dict_train_batch1 = unpickle(file)
X= dict_train_batch1.get('data')
y= dict_train_batch1.get('fine_labels')
return X, y
# 像素矩阵转为图片
def MatrixToImage(data):
return Image.fromarray(data.astype(np.uint8))
# 转为灰度图像
def get_greydata(matrix):
return np.array(MatrixToImage(matrix).convert('L'))
# 转为彩色图像
def get_colordata(matrix):
return np.array(MatrixToImage(matrix))
#将3072的向量数据转化为图片展示
def Vector_pic(x):
matrix = get_colordata((x.reshape(3, 32, 32)).T)
plt.imshow(Image.fromarray(np.uint8(matrix)))
plt.show()
#导入数据
X_train, y_train = get_data(file1) #500张图片,100类
y_train = np.array(y_train)
X_test,y_test=get_data(file3)
Part2.线性和余弦KPCA对比
#选取前3类做训练集-1500张
index=[i in range(3) for i in y_train]
X_train3=X_train[index]
y_train3=y_train[index]
from sklearn.decomposition import KernelPCA as KPCA
from sklearn.metrics import mean_squared_error # 均方误差
import time
import math
n_components=[round(math.pow(2,i)) for i in np.linspace(3,10,15)] #2^3-2^10间15个不同的主成分数目
time_start = time.time() # 计时
kernels = ["linear", "cosine"]
mse = list() # 同一核函数不同主成分下的均方误差
for kernel in kernels:
for i in n_components:
kpca = KPCA(n_components=int(i), kernel=kernel, fit_inverse_transf