03--机器学习之SVM(暂定)

12 篇文章 0 订阅
12 篇文章 0 订阅
from __future__ import print_function
from time import time  #某些步骤需要计时,要用到这个模块
import logging  #打印程序进展方面的信息的模块
import matplotlib.pyplot as plt #在程序最后把识别出的人脸绘制出来,看一下预测与实际的是否符合,要用到这个木块

from sklearn.cross_validation import train_test_split
from sklearn.datasets import fetch_lfw_people
from sklearn.grid_search import GridSearchCV
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix
from sklearn.decomposition import RandomizedPCA
from sklearn.svm import SVC

print(__doc__)

#Display progress logs on stdout
logging.basicConfig(level= logging.INFO,format='%(asctime)s%(message)s')#basicConfig程序进展的信息

#################################################################################
#Download the data, if not already on disk and load it as numpy arrays
lfw_people = fetch_lfw_people(min_faces_per_person=70,resize=0.4) #因为是人脸识别,所以用到这个下载数据集的函数(人脸的数据集)(类型字典结构)

#introspect the images arrays to find the shapes(for plotting)
n_samples,h,w = lfw_people.images.shape #数据集上有多少个实例,hw

#for machine learning we use the 2 data directly(as relative pixel positions info is ignored by this model)
X = lfw_people.data #提取特征向量(每一个行是一个实例,每一个列是一样feature
n_features = X.shape[1]#返回特征向量的维度

#the label to predict is the id of the person
y = lfw_people.target #提取每个实例的label(类似字典结构)
target_names = lfw_people.target_names#返回类别中有谁的名字
n_classes = target_names.shape[0]#有多少个人需要区分

print("Total dataset size:")
print("n_samples:%d"%n_samples) #实例的个数
print("n_features:%d"%n_features)#特征向量的维度
print("n_classes:%d"%n_classes)#总共有多少类(多少人)

################################################################################
#Split into a training set and a test set using a stratified k fold
#Split into a training and testing set
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.25)#把数据集分成训练集和测试集
#X_train,X_test 是一个矩阵,而y_train,y_test是对应的一个向量

################################################################################
#Compute a PCA(eigenfaces)on the face dataset(treated as unlabeled dataset):
#unsupervised feature extraction / dimensionality reduction
n_components = 150  #组成元素的数量(参数)

print("Extracting the top %d eigenfaces from %d faces"%(n_components,X_train.shape[0]))
t0 = time()
pca = RandomizedPCA(n_components=n_components,whiten=True).fit(X_train)#高维的向量降为低维的(对X_train中的特征向量进行降维)
print("Done in %0.3fs"%(time() - t0))

eigenfaces = pca.components_.reshape((n_components,h,w))#提取特征点

print("Prohecting the input data on the eigenfaces orthonormal basis")
t0 = time()
X_train_pca = pca.transform(X_train)#将高维转化为低维
X_test_pca = pca.transform(X_test)#将高维转化为低维
print("done in %0.3fs"%(time()-t0))


##############################################################################
#Train a SVM classification model

print("Fitting the classifier to the training set")
t0 = time()
param_grid = {'C':[1e3,5e3,1e4,5e4,1e5],#选用的参数值,多个量的尝试
              'gamma':[0.0001,0.0005,0.001,0.005,0.01,0.1],}
clf = GridSearchCV(SVC(kernel='rbf',class_weight='auto'),param_grid)#对上面参数进行尝试,看哪组能产生更好的结果
clf = clf.fit(X_test_pca,y_train)#建模
print("done in %0.3fs"%(time()-t0))
print("Best estimator found by grid search:")
print(clf.best_estimator_)

###############################################################################
#Quantitative evaluation of the model quality on the test set

print("Predicting people's names on the test set")
t0 = time()
y_pred = clf.predict(X_test_pca)#对新来的测试集数据进行测试
print("done in %0.3fs"%(time() - t0))

print(classification_report(y_test,y_pred,target_names=target_names))#把预测的标签与真实的标签做比较
print(confusion_matrix(y_test,y_pred,labels=range(n_classes)))


################################################################################
#Qualitative evaluation of the predictions using matplotlib

def plot_gallery(images,titles,h,w,n_row=3,n_col=4):
    """Helper function to polt a gallery of portraits"""
    plt.figure(figsize=(1.8 * n_col,2.4*n_row))
    plt.subplots_adjust(bottom=0,left = .01,right = .99,top=.90,hspace=.35)
    for i in range(n_row*n_col):
        plt.subplot(n_row,n_col,i+1)
        plt.imshow(images[i].reshape((h,w)),cmap=plt.cm.gray)
        plt.title(titles[i],size = 12)
        plt.xticks(())
        plt.yticks(())

#plot the result of the prediction on a portion of the test set

def title(y_pred,y_test,target_names,i):
    pred_name = target_names[y_pred[i]].rsplit('',1)[-1]
    true_name = target_names[y_test[i]].rsplit('',1)[-1]
    return 'predicted:%s\ntrue:    %s'%(pred_name,true_name)

prediction_titles = [title(y_pred,y_test,target_names,i)
                     for i in range(y_pred.shape[0])]

plot_gallery(X_test,prediction_titles,h,w)

#plot the gallery of the most significative eigenfaces
eigenfaces_titles = ["eigenface %d"%i for i in range(eigenfaces.shape[0])]
plot_gallery(eigenfaces,eigenfaces_titles,h,w)

plt.show()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值