"""用pca降维后一个人脸识别的实列""" from sklearn.datasets import fetch_olivetti_faces import time import numpy as np import logging from matplotlib import pyplot as plt from sklearn.model_selection import train_test_split from sklearn.decomposition import PCA from sklearn.model_selection import GridSearchCV from sklearn.svm import SVC # 读取数据 logging.basicConfig(level=logging.INFO, format='%(asctime)s %(message)s') data_home = 'datasets/' logging.info('Start to load dataset') faces = fetch_olivetti_faces(data_home=data_home) logging.info('Done with load dataset') X = faces.data y = faces.target """将数据画出来""" # targets = np.unique(faces.target) # targets_names = np.array(["c%d" % t for t in targets]) # n_targets = targets_names.shape[0] # n_samples, h, w = faces.images.shape # print(n_samples) # print(n_targets) # print(w, h, X.shape) # # # def plot_gallery(images, titles, h, w, n_row=2, n_col=5): # """显示图片阵列""" # plt.figure(figsize=(2 * n_col, 2.2 * n_row), dpi=144) # plt.subplots_adjust(bottom=0, left=0.01, right=.99, top=.90, hspace=.01) # for i in range(n_row * n_col): # plt.subplot(n_row, n_col, i+1) # plt.imshow(images[i].reshape((h, w)), cmap=plt.cm.gray) # # plt.title(titles[i]) # plt.axis('off') # plt.show() # # # n_row = 2 # n_col = 6 # # sample_images = None # sample_titles = [] # # for i in range(n_targets): # people_images = X[y == i] # people_sample_index = np.random.randint(0, people_images.shape[0], 1) # people_sample_image = people_images[people_sample_index, :] # # if sample_images is not None: # sample_images = np.concatenate((sample_images, people_sample_image), axis=0) # else: # sample_images = people_sample_image # sample_titles.append(targets_names[i]) # plot_gallery(sample_images, sample_titles, h, w, n_row, n_col) X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4) """找出k个特征属性,并呼画出数据还原率和k个属性之间的关系""" # print("Exploring explained variance ratio for dataset") # candidate_components = range(10, 300, 30) # explained_ratios = [] # start = time.perf_counter() # for c in candidate_components: # pca = PCA(n_components=c) # X_pca = pca.fit_transform(X) # explained_ratios.append(np.sum(pca.explained_variance_ratio_)) # print('Done in {0:.2f}s'.format(time.perf_counter() - start)) # 画出k个属性和数据还原率的图像 # plt.figure(figsize=(10, 6), dpi=144) # plt.grid() # plt.plot(candidate_components, explained_ratios) # plt.xlabel('Number of PCA Components') # plt.ylabel('Explained Variance Ratio') # plt.title('Explained variance ratio for PCA') # plt.yticks(np.arange(0.5, 1.05, .05)) # plt.xticks(np.arange(0, 300, 20)) # plt.show() # 画出用不同的k个属性进行降维后的图像和原图的对比 # n_row = 5 # n_col = 1 # # sample_images = sample_images[0:5] # sample_titles = sample_titles[0:5] # # plotting_images = sample_images # plotting_titles = sample_titles # candidate_components = [140, 75, 37, 19, 8] # for c in candidate_components: # print("Fitting and projecting on PCA(n_components={}) ...".format(c)) # start = time.perf_counter() # pca = PCA(n_components=c) # pca.fit(X) # X_sample_pca = pca.transform(sample_images) # X_sample_inv = pca.inverse_transform(X_sample_pca) # plotting_images = np.concatenate((plotting_images, X_sample_inv), axis=0) # # sample_title_pca = sample_titles # plotting_titles = np.concatenate((plotting_titles, sample_titles), axis=0) # print("Done in {0:.2f}s".format(time.perf_counter()-start)) # # # print("Plotting sample image with different number of PCA components ...") # plot_gallery(plotting_images, plotting_titles, h, w, 6, n_row) # 进行特征pca降维,其中选择的特征个数为140 n_component = 140 print("Fitting PCA by using train data...") start = time.perf_counter() pca = PCA(n_components=n_component, svd_solver='randomized', whiten=True).fit(X_train) print('Done in {0:.2f}s'.format(time.perf_counter() - start)) X_train_pca = pca.transform(X_train) X_test_pca = pca.transform(X_test) # 找出利用SVM算法的最佳系数 print('Searching the best parameters for SVC...') param_grid = {'C': [1, 5, 10, 50, 100], 'gamma': [0.0001, 0.0005, 0.001, 0.005, 0.01]} clf = GridSearchCV(SVC(kernel='rbf', class_weight='balanced'), param_grid, verbose=2, n_jobs=4) clf.fit(X_train_pca, y_train) print("Best parameters found by grid search: %s" % clf.best_params_) # 算出模型的准确率 print("Predict test dataset...") test_score = clf.score(X_test_pca, y_test) print(test_score)
sklearn----pca
最新推荐文章于 2019-07-21 16:29:13 发布
该博客通过使用sklearn库中的PCA进行人脸识别数据集的降维。首先展示了数据加载和预处理过程,然后通过PCA找出最佳的特征数量,以最大化数据的解释方差。接着,比较了不同PCA组件数量下原始图像和降维后的图像效果。最后,使用SVM进行分类,并通过GridSearchCV找到最佳参数,评估了模型在测试集上的准确性。
摘要由CSDN通过智能技术生成