看着这个学https://www.bilibili.com/video/av54019270/?p=1
flw_数据集可能下载不成功,可以用我下载好的这个https://download.csdn.net/download/weixin_40893939/11276022,放到下载失败的那个目录下替换,就说这么多吧。。。这要是都弄不好,那就别玩了。
关键词:
1 rbf、gamma 和C参数:gamma值对应模型的复杂程度,越大的gamma值分类越准,但是泛化能力差。C参数意味着泛化能力越低。
2 sklearn内置lfw人脸数据集
3 pca降维
4 GridSearchCV网格搜索交叉验证选择最佳参数
5 混淆矩阵
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns; sns.set()
from sklearn.datasets.samples_generator import make_blobs
from sklearn.svm import SVC
from sklearn.decomposition import PCA
from sklearn.pipeline import make_pipeline
from sklearn.datasets.samples_generator import make_circles
from sklearn.model_selection import train_test_split
def plot_svc_decision_function(model,ax=None, plot_support=True):
if ax is None:
ax = plt.gca()
xlim = ax.get_xlim()
ylim = ax.get_ylim()
x = np.linspace(xlim[0], xlim[1], 30)
y = np.linspace(ylim[0], ylim[1], 30)
Y, X = np.meshgrid(y, x)
xy = np.vstack([X.ravel(), Y.ravel()]).T
P = model.decision_function(xy).reshape(X.shape)
ax.contour(X, Y, P, colors='k',
levels=[-1, 0, 1], alpha=0.5,
linestyles=['--', '--', '--'])
if plot_support:
ax.scatter(model.support_vectors_[:, 0],
model.support_vectors_[:, 1],
s=300, linewidth=1, facecolors='none')
ax.set_xlim(xlim)
ax.set_ylim(ylim)
def linear():
x,y = make_blobs(n_samples=50,centers = 2,random_state=0,cluster_std=0.6)
model = SVC(kernel='linear')
model.fit(x,y)
plt.scatter(x[:, 0], x[:, 1], c=y, s=50, cmap='autumn')
plot_svc_decision_function(model)
plt.show()
def kernel():
x,y = make_circles(1000,factor=.1,noise =.1)
clf = SVC(kernel='rbf',C=1E6)
clf.fit(x,y)
plt.scatter(x[:,0],x[:,1],c=y,s=50,cmap='autumn')
plot_svc_decision_function(clf)
plt.scatter(clf.support_vectors_[:,0],clf.support_vectors_[:,1],s=3,lw=1,facecolors ='none')
plt.show()
def face():
from sklearn.datasets import fetch_lfw_people
faces = fetch_lfw_people(min_faces_per_person=60)
print(faces.target_names)
print(faces.images.shape)
fig,ax = plt.subplots(3,5)
for i,axi in enumerate(ax.flat):
axi.imshow(faces.images[i],cmap ='bone')
axi.set(xticks =[],yticks =[],xlabel = faces.target_names[faces.target[i]])
plt.show()
pca = PCA(n_components=150,whiten = True,random_state=42)
svc = SVC(kernel ='rbf',class_weight='balanced')
model = make_pipeline(pca,svc)#make_pipeline函数是Pipeline类的简单实现,只需传入每个step的类实例即可,不需自己命名
xtrain,xtest,ytrain,ytest =train_test_split(faces.data,faces.target,random_state=40)
from sklearn.model_selection import GridSearchCV
param_grid= {'svc__C':[1,5,10],'svc__gamma':[0.0001,0.0005,0.001]}
grid =GridSearchCV(model,param_grid)
grid_result =grid.fit(xtrain,ytrain)
print(grid_result.best_params_)
model = grid.best_estimator_
yfit = model.predict(xtest)
print(yfit.shape)
fig, ax = plt.subplots(4, 6)
for i, axi in enumerate(ax.flat):
axi.imshow(xtest[i].reshape(62,47), cmap='bone')
axi.set(xticks=[],yticks =[])
axi.set_ylabel(faces.target_names[yfit[i]].split()[-1],color ='black'
if yfit[i] == ytest[i] else 'red')
fig.suptitle('Predicted Names:Incorrect Labels in Red',size=14)
plt.show()
from sklearn.metrics import classification_report
print(classification_report(ytest,yfit,target_names=faces.target_names))
from sklearn.metrics import confusion_matrix
mat = confusion_matrix(ytest,yfit)
sns.heatmap(mat.T,square=True,annot=True,fmt='d',cbar=False,
xticklabels=faces.target_names,
yticklabels=faces.target_names)
plt.xlabel('true label')
plt.ylabel('predicted label')
plt.show()
linear()
kernel()
face()