#https://jingyan.baidu.com/article/72ee561a91f013a06038df63.html 关于向量机最大化间距的数学原理可参考这篇文章
import numpy as np
import pandas as pd
import scipy.io as sio
import matplotlib.pyplot as plt
from sklearn import svm
from sklearn import metrics
import seaborn as sns
mat=sio.loadmat("ex6data1.mat")
print(mat.keys())
data=pd.DataFrame(mat.get('X'),columns=['X1','X2'])
data['y']=mat.get('y')
print(data)
fig,ax=plt.subplots(figsize=(12,8))
#scatter函数中,我们如果使用了c与cmap的组合那么c就成为了点的区别依据,cmap成为为区别的显眼颜色
ax.scatter(data['X1'],data['X2'],c=data['y'],cmap='coolwarm')
ax.set_xlabel('X1')
ax.set_ylabel('X2')
plt.show()
svcl=svm.LinearSVC(C=100,loss='hinge')#调用向量机,C是作用在代价函数上的项 loss是损失函数
svcl.fit(data[['X1','X2']],data['y'])#fit是调用参数来训练你的向量机,训练好之后就可以调用参数来预测了
print(svcl.score(data[['X1','X2']],data['y']))#这是在训练好的svm中调用参数后所得的分数,当然是越高越好
data['distance c1']=svcl.decision_function(data[['X1','X2']])#decision_function是用来返回一个数组,里面记录着你的数据是为
#于超平面的那一侧,已经离超平面的距离是多少
plt.scatter(data['X1'],data['X2'],c=data['distance c1'],cmap='coolwarm')
plt.title('SVM linear when c=1')
plt.show()
svcl=svm.LinearSVC(C=1,loss='hinge')#调用向量机
svcl.fit(data[['X1','X2']],data['y'])#训练向量机
print(svcl.score(data[['X1','X2']],data['y']))
data['distance c100']=svcl.decision_function(data[['X1','X2']])
plt.scatter(data['X1'],data['X2'],c=data['distance c100'],cmap='coolwarm')
plt.title('SVM linear when c=100')
plt.show()
#其实python库里sklearn是由高斯核函数的,但是为了更好地学习,我们将其编写出来
def gaussian_kernel(x1,x2,reg):
return np.exp(-np.power(x1-x2,2).sum()/(2*np.power(reg,2)))
#下面这是用来测试高斯核函数的正确性
X1=np.array([1,2,1])
X2=np.array([0,4,-1])
reg=2
print(gaussian_kernel(X1,X2,reg))
mat=sio.loadmat('ex6data2.mat')
data=pd.DataFrame(mat.get('X'),columns=['X1','X2'])
data['y']=mat.get('y')
print(data)
plt.scatter(data['X1'],data['X2'],c=data['y'],cmap='Dark2')
plt.xlabel('X1')
plt.ylabel('X2')
plt.title('titile')
plt.show()
#在第二个练习中我们直接调用函数来执行高斯核函数的svm,kernal=‘rbf',
#probability的作用是是否启用概率的估计
svc=svm.SVC(C=100,kernel='rbf',gamma=10,probability=True)
svc.fit(data[['X1','X2']],data['y'])
print(svc.score(data[['X1','X2']],data['y']))
#predict_probe返回一个数组表示测试样本属于每种类型的概率,我们只需要probe出来的概率到底是属于0还是属于1
predict=svc.predict_proba(data[['X1','X2']])[:,0]
print(predict)
plt.scatter(data['X1'],data['X2'],c=predict,cmap='Reds')
plt.show()
print(svc.predict_proba([[0.8,0.6]]))
mat=sio.loadmat("ex6data3.mat")
print(mat.keys())
data=pd.DataFrame(mat.get('X'),columns=['X1','X2'])
data['y']=mat.get('y')
cv=pd.DataFrame(mat.get('Xval'),columns=['X1','X2'])
cv['y']=mat.get('yval')
#我们想知道带入哪些参数之后svm的表现是最好的
candidate=[0.01, 0.03, 0.1, 0.3, 1, 3, 10, 30]
combination=[(c,gamma)for c in candidate for gamma in candidate]
search=[]
for c,gamma in combination:
svc=svm.SVC(C=c,gamma=gamma)
svc.fit(data[['X1','X2']],data['y'])
search.append(svc.score(cv[['X1','X2']],cv['y']))
best_score=search[np.argmax(search)]
best_parameter=combination[np.argmax(search)]
best_svc=svm.SVC(C=3,gamma=30)
best_svc.fit(data[['X1','X2']],data['y'])
yred=best_svc.predict(cv[['X1','X1']])
#我们看一下我们svm训练出来的y值与实际的y值有多大的差距
print(metrics.classification_report(cv['y'],yred))
spam_train=sio.loadmat("spamTrain.mat")
spam_test=sio.loadmat("spamTest.mat")
X=spam_train['X']
y=spam_train['y']
Xval=spam_test['Xtest']
yval=spam_test['ytest'].ravel()
svc=svm.SVC()
svc.fit(Xval,yval)
#svc中的y参数也是一样一定要求是一维的,直接将其用ravel函数转换为1维的
print(svc.score(Xval,yval))
吴恩达第六次的编程作业
最新推荐文章于 2024-06-24 16:13:03 发布