#CIFAR-10数据集:包含60000个32*32的彩色图像,共10类,每类6000个彩色图像。有50000个训练图像和10000个测试图像。
importscipy.io
train_data=scipy.io.loadmat("F:\\模式识别\\最小错误率的贝叶斯决策进行图像分类\\data\\train_data.mat")print(type(train_data))print(train_data.keys())print(train_data.values())print (len(train_data['Data']))#单张图片的数据向量长度:32X32X3=3072#内存占用量=3072*4*9968=116M 假定一个整数占用4个字节
print (len(train_data['Data'][0]))print(train_data)
x= train_data['Data']
y= train_data['Label']print(y)print(len(y))print(y.shape)print(y.flatten().shape)#labels_name:共5个标签,分别为airplane、automobile、bird、cat、deer
importscipy.io
labels_name=scipy.io.loadmat("F:\\模式识别\\最小错误率的贝叶斯决策进行图像分类\\data\\labels_name.mat")print(type(labels_name))print(labels_name)print(len(labels_name))#test_data:共5000个图像,5类,每类1000个图像
importscipy.io
test_data=scipy.io.loadmat("F:\\模式识别\\最小错误率的贝叶斯决策进行图像分类\\data\\test_data.mat")print (test_data['Label'])print (test_data['Data'])print (len(test_data['Label']))
datatest= test_data['Data']
labeltest= test_data['Label']print(datatest.shape)print(labeltest.shape)
test_index=[]for i inrange(len(labeltest)):if labeltest[i]==1:
test_index.append(i)elif labeltest[i]==2:
test_index.append(i)elif labeltest[i]==3:
test_index.append(i)#print (test_index)
labeltest=test_data['Label'][:3000]#print (labeltest)
importmatplotlib.pyplot as pltfrom mpl_toolkits.mplot3d importAxes3Dprint(x)print(x.shape)print(type(x))from sklearn.discriminant_analysis importLinearDiscriminantAnalysisfrom sklearn.decomposition importPCA
pca=PCA(n_components=0.79)#训练模型
pca.fit(x)
x_new=pca.transform(x)print("降维后各主成分的累计方差贡献率:",pca.explained_variance_ratio_)print("降维后主成分的个数:",pca.n_components_)print(x_new)
index_1=[]
index_2=[]
index_3=[]
index_num=[]for i inrange(len(y)):if y[i]==1:
index_1.append(i)elif y[i]==2:
index_2.append(i)elif y[i]==3:
index_3.append(i)
index_num=[len(index_1),len(index_2),len(index_3)]print(len(index_1))print(len(index_2))print(len(index_3))print(index_num)importnumpy as np
class1_feature=[]
class2_feature=[]
class3_feature=[]#index_1
for i inindex_1:
class1_feature.append(x_new[i])print(len(class1_feature))for i inindex_2:
class2_feature.append(x_new[i])print(len(class2_feature))for i inindex_3:
class3_feature.append(x_new[i])print(len(class3_feature))#计算第一类的类条件概率密度函数的参数
class1_feature=np.mat(class1_feature)print(class1_feature.shape)
miu1=[]
sigma1=[]for i in range(30):
miu=class1_feature[:,i].sum()/len(index_1)
miu1.append(miu)
temp=class1_feature[:,i]-miu
class1_feature[:,i]=temp
sigma1=(class1_feature.T*class1_feature)/len(index_1)print(miu1)print(sigma1)print(sigma1.shape)#计算第二类类条件概率密度函数的参数
class2_feature=np.mat(class2_feature)
miu2=[]
sigma2=[]for i in range(30):
miu=class2_feature[:,i].sum()/len(index_2)
miu2.append(miu)
temp=class2_feature[:,i]-miu
class2_feature[:,i]=temp
sigma2=(class2_feature.T*class2_feature)/len(index_2)print(miu2)print(sigma2)print(sigma2.shape)#计算第三类类条件概率密度函数的参数
class3_feature=np.mat(class3_feature)
miu3=[]
sigma3=[]for i in range(30):
miu=class3_feature[:,i].sum()/len(index_3)
miu3.append(miu)
temp=class3_feature[:,i]-miu
class3_feature[:,i]=temp
sigma3=(class3_feature.T*class3_feature)/len(index_3)print(miu3)print(sigma3)print(sigma3.shape)#计算三个类别的先验概率:
prior_index1=len(index_1)/len(y)
prior_index2=len(index_2)/len(y)
prior_index3=len(index_3)/len(y)print(prior_index1)print(prior_index2)print(prior_index3)importmath#降维
x_test =pca.transform(datatest)print(x_test)print(x_test.shape)print(x_test[0])#print ((np.mat(x_test[0]-miu1))*sigma1.I*(np.mat(x_test[0]-miu1).T))#print (((np.mat(x_test[0]-miu1))*sigma1.I*(np.mat(x_test[0]-miu1).T))[0,0])
predict_label=[]for i in range(3000):
g1=-0.5*((np.mat(x_test[i]-miu1))*sigma1.I*(np.mat(x_test[i]-miu1).T))[0,0]-0.5*math.log(np.linalg.det(sigma1))+math.log(prior_index1)
g2=-0.5*((np.mat(x_test[i]-miu2))*sigma2.I*(np.mat(x_test[i]-miu2).T))[0,0]-0.5*math.log(np.linalg.det(sigma2))+math.log(prior_index2)
g3=-0.5*((np.mat(x_test[i]-miu3))*sigma3.I*(np.mat(x_test[i]-miu3).T))[0,0]-0.5*math.log(np.linalg.det(sigma3))+math.log(prior_index3)if g1>g2:
max=1
if g1>g3:
max=1
else:
max=3
else:
max=2
if g2>g3:
max=2
else:
max=3predict_label.append(max)from sklearn.metrics importaccuracy_scoreprint (accuracy_score(predict_label,labeltest))