Unsupervised Learning Model-Reducing Dimension
Author: Xie Zhong-zhao
1. PCA
import numpy as np
#初始化一个2*2的线性相关矩阵
M = np.array([[1,2],[2,4]])
#计算2*2线性相关矩阵的秩
print(np.linalg.matrix_rank(M,tol=None))
import pandas as pd
#使用pandas分别读取训练数据和测试数据集
digit_train = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tra',header=None)
digit_test = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes',header=None)
#分割训练数据的特征向量和标记
X_digits = digit_train[np.arange(64)]
y_digits = digit_train[64]
# print(X_digits)
# print(y_digits)
'''
从sklearn.decomposition导入PCA
'''
from sklearn.decomposition import PCA
#初始化一个可以将高维特征向量64维压缩到二个维度的PCA
estimator = PCA(n_components=2)
X_pca = estimator.fit_transform(X_digits)
print(X_pca)
print(X_pca[:,0])
print([y_digits.as_matrix() == 1])
print(X_pca[:,0][y_digits.as_matrix() == 1])
'''
显示10类手写数字图片经PCA压缩后的2维空间分布
'''
from matplotlib import pyplot as plt
def plot_pca_scatter():
colors = ['black','blue','purple','yellow','white','red','lime','cyan','orange','gray']
for i in range(len(colors)):
px = X_pca[:,0][y_digits.as_matrix() == i]
py = X_pca[:,1][y_digits.as_matrix() == i]
plt.scatter(px,py,c=colors[i])
plt.legend(np.arange(0,10).astype(str))
plt.xlabel('First Principal Component')
plt.ylabel('Second Principal Component')
plt.show()
plot_pca_scatter()
1
[[-12.44580352 4.71301334]
[-16.52024584 13.07467011]
[ 18.84671433 -10.71445017]
...,
[-24.11867087 -5.78704883]
[-29.35115567 3.41363267]
[ 24.993826 -11.79116904]]
[-12.44580352 -16.52024584 18.84671433 ..., -24.11867087 -29.35115567
24.993826 ]
[array([False, False, False, ..., False, False, False], dtype=bool)]
[ 1.03802829e+01 -1.04379656e+01 1.43618578e+01 1.16339451e+01
-3.30200846e+00 1.22332485e+01 9.20407222e+00 8.30492835e+00
8.56969878e+00 4.23669716e+00 6.60284625e+00 9.01959870e+00
1.15801627e+01 8.49019062e+00 -3.78521322e-01 -1.68102735e+01
-2.19399331e+00 1.40600870e+01 4.16898313e+00 2.94525323e+00
8.72491087e+00 6.37000716e+00 1.03965755e+01 1.06175330e+01
9.78565432e+00 7.63470042e+00 1.06497944e+01 8.14431578e+00
1.20032089e+01 8.37815835e+00 -9.58612285e-01 6.57085269e+00
6.34220091e-01 1.72254106e+00 -6.77930465e-01 -8.46183679e-01
-1.16357548e+00 3.52554139e+00 -8.76401983e+00 -1.92717564e+00
-6.87795966e+00 4.47138806e+00 6.03851916e-01 9.50989460e+00
4.68779834e+00 3.41906322e+00 1.31339429e+01 -5.67266416e+00
-2.58530877e+00 -6.35470508e+00 -5.23340480e+00 -2.33391071e+00
-1.11270494e+01 -2.21179220e-01 9.47692668e+00 5.01763456e+00
1.39901481e+01 7.96383394e+00 5.79810644e+00 1.14577890e+01
1.02713550e+01 -6.87254041e+00 -1.16995102e+01 -9.87992841e+00
-1.33394598e+01 -1.52933686e+01 -1.13868268e+01 -1.60171997e+01
-6.35680734e+00 -1.54948588e+01 2.49034817e+00 3.64840049e+00
-2.27528666e+00 -6.44265684e+00 6.31203415e+00 6.08488708e+00
5.12631116e+00 4.94814660e+00 1.56022297e+00 5.86349685e+00
7.92933353e+00 5.01878329e+00 5.93952581e+00 4.51645036e-01
3.91895540e+00 1.27727527e+01 1.85489607e+01 1.20872407e+01
1.02281540e+01 1.75347348e+01 1.94804961e+01 -5.40113023e+00
2.40162635e-01 -3.52528854e-01 1.05018301e+01 2.39317011e+00
3.77365620e+00 1.11763928e+01 1.40209995e+01 1.46306065e+01
1.29994591e+01 1.14743177e+01 -1.71281797e+01 7.23911978e+00
7.89591393e+00 7.65909505e+00 6.45541038e+00 8.70473702e+00
1.24854320e+01 -4.21198965e-01 1.28954391e+01 2.50478653e+00
1.79305349e+00 -1.22541996e+01 4.19760484e-01 -6.20773102e+00
1.05994014e+00 1.23992384e+01 9.76004438e-01 6.05600248e+00
5.52587831e-01 -4.80961444e+00 -2.63961568e+00 3.77742885e+00
2.34466989e+00 -6.82109440e+00 -9.38219051e-01 6.98226771e+00
2.19819315e+00 -5.14118720e+00 -2.21269059e-01 6.87760329e+00
-5.07990529e+00 6.07700374e+00 1.51443865e+00 -4.12570503e+00
1.17420745e+01 2.68636054e+00 8.43724576e-01 9.04821693e+00
5.69062465e+00 5.09391771e+00 2.38727241e-02 1.30291002e-01
-2.21696494e+00 1.62061822e+00 8.56541019e-01 -1.04122429e+01
5.31662513e+00 7.84844534e+00 -2.53040835e+00 -1.29182785e+01
-1.03348326e+01 6.82147467e-01 -5.43888083e+00 -1.63331640e+01
-1.00525923e+01 1.28068930e+01 1.33231200e+01 1.13147850e+01
8.91862856e+00 1.34008824e+00 -7.70197618e+00 1.28705348e+01
-4.80093992e+00 -1.06846372e+01 1.64133374e+00 -4.94311753e+00
1.06232067e+01 1.71831499e+00 6.82508338e+00 1.34554586e+01
9.39502862e+00 1.33799335e+01 8.08492152e+00 1.49620726e+01
7.17581227e+00 9.34415664e+00 9.33960828e+00 7.86356836e+00
2.14118801e+00 -2.13592073e+00 1.11653492e+01 -1.04491366e+00
-3.94541357e+00 4.56132149e+00 2.05559793e+00 1.61866475e+01
4.37683236e+00 -9.62052240e-02 -1.32389449e+00 1.27283361e+01
9.29013078e+00 1.41483410e+01 4.51357809e+00 2.59420973e+00
8.68599402e+00 -4.13381693e+00 -7.02643152e+00 6.71717387e+00
-6.84623064e+00 7.91850548e+00 4.08618847e+00 6.93902558e+00
6.33680507e+00 6.93103762e+00 -4.92035361e+00 1.07324064e+01
1.24797655e+01 1.30762501e+01 9.40172112e+00 1.35612664e+01
-3.93549392e+00 1.32420591e+01 4.73654349e+00 1.95419045e+00
4.41819042e+00 9.07426636e+00 -4.46064879e+00 7.36219691e+00
5.31371422e+00 -1.01825385e+01 -1.35445035e+01 1.15390490e+01
-1.04932638e+01 1.07819865e+01 8.77818200e+00 5.04039534e+00
1.36233502e+01 -1.06959043e+01 -1.20470341e+01 -6.57222515e+00
5.13528176e+00 -6.77706748e+00 -8.89440513e+00 -5.80165564e+00
-1.87681654e+00 4.56814347e+00 9.99379256e+00 3.15301675e-01
5.18439757e+00 3.71502472e+00 9.15767108e+00 8.84303133e+00
-6.96951400e-01 8.49256956e+00 1.01068733e+01 1.04815898e+01
-6.15395530e+00 1.42880924e+01 7.61468791e+00 -6.26532565e-02
-1.09296969e+00 3.37597493e+00 1.87111720e+00 1.16395328e+01
7.51366451e+00 3.78312476e+00 1.33321724e+01 5.62117699e+00
-9.75671342e+00 -1.88249908e+00 3.53846998e+00 4.41674517e+00
3.11406313e-01 6.77850208e+00 4.84468461e+00 9.93418932e+00
5.37455218e+00 1.16300889e+01 7.76200432e-01 -7.16517836e+00
7.30956796e+00 -4.03537992e+00 1.21840926e+01 1.34320669e+01
1.12880424e+01 7.34254268e+00 -8.53760892e+00 9.96101519e-01
1.11955136e+01 1.16191430e+01 8.51748445e+00 1.10262393e+01
2.67367744e+01 5.69619180e+00 5.99652302e+00 1.73695447e+01
-7.68083524e+00 1.32947737e+01 -1.19518330e+00 1.09855694e+01
-3.93538833e+00 1.04744223e+01 1.08595847e+01 1.25569475e+01
-1.01545515e+01 -1.39919828e+01 5.43785465e+00 1.01277835e+01
1.18250355e+01 -5.53136455e+00 5.74723755e-02 8.65924601e-02
1.77912205e+00 3.83230801e+00 1.16182396e+01 6.44742437e+00
9.69097559e+00 -1.02640487e+01 8.21685873e-01 -8.25560043e+00
-6.92170755e+00 -5.61494893e+00 -4.36365563e-01 7.32723893e+00
-6.56976697e+00 -1.30178755e+01 1.08404587e+01 3.45203125e+00
-4.68373413e+00 6.36076304e+00 2.92399045e+00 8.78764678e+00
2.16693344e+00 6.80021418e+00 -7.02931956e+00 1.27833562e+01
2.23739799e+01 6.26276605e+00 9.66522738e+00 3.38021206e+00
1.06152100e+01 7.41959756e+00 5.79736793e+00 9.06006772e+00
5.43146505e+00 -5.39811599e+00 9.48236309e+00 9.03057522e+00
9.67984449e+00 1.20660153e+01 8.14266430e+00 1.01896935e+01
1.18091326e+01 3.91897983e+00 1.05349565e+01 1.44086876e+01
-8.71950416e+00 4.18190750e+00 -8.67530064e-01 7.93708914e+00
8.70317666e+00 5.87257428e+00 7.08372547e+00 8.58931682e+00
1.25187431e+01 8.66464885e+00 7.36489793e+00 -1.27530341e+01
-8.35661199e+00 1.30629911e+01 1.08040910e+01 8.77241417e+00
9.50381783e+00 1.24992913e+01 -8.31521021e+00 2.00673606e+00
4.58585526e+00 3.16349264e+00 -1.10608458e+00 2.95561319e+00
7.34034207e+00 8.11673502e+00 7.27750323e+00 1.57382437e+01
1.06340794e+01 1.15884364e+01 9.62182749e+00 -1.13317133e+01
-7.87394324e+00 -1.97542950e+00 -2.80337867e+00 -5.70773108e+00
1.97880168e+01 8.16372094e+00 1.15713887e+01 8.96707599e+00
1.54352343e+01]
import pandas as pd
import numpy as np
#使用pandas分别读取训练数据和测试数据集
digit_train = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tra',header=None)
digit_test = pd.read_csv('https://archive.ics.uci.edu/ml/machine-learning-databases/optdigits/optdigits.tes',header=None)
#分割训练数据的特征向量和标记
X_train = digit_train[np.arange(64)]
y_train = digit_train[64]
X_test = digit_test[np.arange(64)]
y_test = digit_test[64]
'''
导入基于线性核的支持向量机分类
'''
from sklearn.svm import LinearSVC
from sklearn.decomposition import PCA
#使用默认配置初始化LinearSVC,对原始64像素特征的训练数据进行建模,并在测试数据上做出预测,存储在y_predict中
svc = LinearSVC()
svc.fit(X_train,y_train)
y_predict = svc.predict(X_test)
#使用PCA将64维图像数据压缩到20维
estimator = PCA(n_components=20)
#利用训练特征决定(fit)20个正交维度方向,并转化(transform)原训练特征
pca_X_train = estimator.fit_transform(X_train)
#测试特征也按照上述20个正交维度方向进行转化(transform)
pca_X_test = estimator.transform(X_test)
#使用默认初始化配置LinearSVC,对压缩后的20位特征的训练数据进行建模,并在测试数据上做出预测,存储在pca_y_predict中
pca_svc = LinearSVC()
pca_svc.fit(pca_X_train,y_train)
pca_y_predict = pca_svc.predict(pca_X_test)
'''
原始像素特征与PCA压缩重建的低维特征,在相同配置的支持向量机(分类)模型上识别性能的差异
'''
#从sklearn.metrics导入classification_report用于更加细致的分类性能分析
from sklearn.metrics import classification_report
#对原始图像高维像素特征训练的支持向量机分类器的性能做出评估
print(svc.score(X_test,y_test))
print(classification_report(y_test,y_predict,target_names=list(np.arange(10).astype(str))))
#使用PCA压缩重建的低维图像特征训练的支持向量机分类器的性能进行评估
print(pca_svc.score(pca_X_test,y_test))
print(classification_report(y_test,pca_y_predict,target_names=list(np.arange(10).astype(str))))
0.920979410128
precision recall f1-score support
0 0.99 0.98 0.99 178
1 0.89 0.89 0.89 182
2 0.99 0.96 0.97 177
3 0.99 0.84 0.91 183
4 0.95 0.97 0.96 181
5 0.89 0.96 0.92 182
6 0.99 0.96 0.98 181
7 0.99 0.90 0.94 179
8 0.72 0.91 0.80 174
9 0.87 0.84 0.86 180
avg / total 0.93 0.92 0.92 1797
0.927657206455
precision recall f1-score support
0 0.96 0.96 0.96 178
1 0.93 0.85 0.89 182
2 0.97 0.95 0.96 177
3 0.96 0.91 0.93 183
4 0.94 0.97 0.95 181
5 0.90 0.97 0.93 182
6 0.97 0.98 0.98 181
7 0.98 0.92 0.95 179
8 0.89 0.86 0.87 174
9 0.81 0.91 0.86 180
avg / total 0.93 0.93 0.93 1797