SVDD算法的相关学习笔记参考这篇博客: https://blog.csdn.net/sterben25/article/details/79106351
在此之前,我写一个关于使用SVDD算法进行鸢尾花分类的代码和博客,然而该方法并没有使用核函数,来体现支持向量方法的优越性,因此,本博文用于记录算法使用核函数后,并对月牙数据进行一个支持向量边界求取。
可视化结果如下图所示:
月牙数据如下所示:
from sklearn import datasets
X,y = datasets.make_moons(n_samples = 100, noise = 0.2, random_state = 42)
y = 2*y - 1 # Rescale labels to be {-1,1}
可引入不同核函数的SVDD算法的部分代码如下:
import numpy as np
import quadprog
class SVDD:
def __init__(self, C = 0.01, gamma = 3.):
"""
Initialize SVDD with Gaussian kernel function
Parameters
----------
C : scalar
SVDD allowable error constraint bound
gamma : scalar
Kernel function parameter
"""
self.C_ = C
self.gamma_ = gamma
def fit(self, X, y, sample_weight = None):
"""
Fit the model according to the given training data.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Training vector, where n_samples in the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target vector relative to X
sample_weight : array-like, shape = [n_samples], optional
Array of weights that are assigned to individual
samples. If not provided,
then each sample is given unit weight.
Returns
-------
self : object
Returns self.
"""
# Find coefficients and support vectors
P, q, G, h, A, b = self._build_qp(X, y)
beta = y*self._quadprog_solve_qp(self._nearestPD(P), q, G, h, A, b)
self.support_vectors_ = X[abs(beta) > 1e-6,:]
self.support_ = np.squeeze(np.where(abs(beta) > 1e-6))
self.dual_coef_ = beta[abs(beta) > 1e-6]
# Find decision threshold
R2, _ = self._radius(self.support_vectors_)
self.threshold_ = np.mean(R2)
return self
def predict(self, X):
"""
Predict data classification
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Test vector, where n_samples in the number of samples and
n_features is the number of features.
Returns
-------
y : array-like, shape = [n_samples]
Target vector relative to X
"""
y = np.sign(self.decision_function(X)).astype('int')
y[y == 0] = 1 # If data is on the threshold boundary, include it in the SVDD
return y
def decision_function(self, X):
"""
Test point radial difference from decision threshold. Negative
difference means test point is outside SVDD.
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Data vector, where n_samples in the number of samples and
n_features is the number of features.
Returns
-------
scalar shape = [n_samples]
Radial difference from decision threshold
"""
radius, _ = self._radius(X)
return self.threshold_ - radius
def _radius(self, X):
"""
Test data hypersphere radii
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Data vector, where n_samples in the number of samples and
n_features is the number of features.
Returns
-------
R2 : array-like, shape = [n_samples]
Hypersphere radius for each sample
dR2 : array-like, shape = [n_samples]
Hypersphere derivative evaluated at each sample
"""
kap = 0
lam = 0
mu = 0
for i in range(len(self.dual_coef_)):
Kxz, dKxz = self._rbf_kernel(self.support_vectors_[i,:], X)
kap = kap + self.dual_coef_[i]*Kxz
mu = mu + self.dual_coef_[i]*dKxz
for j in range(len(self.dual_coef_)):
Kxx, _ = self._rbf_kernel(self.support_vectors_[i,:], self.support_vectors_[j,:])
lam = lam + self.dual_coef_[i]*self.dual_coef_[j]*Kxx
R2 = 1 - 2*kap + lam
dR2 = -2*mu
return R2, dR2
def _rbf_kernel(self, x, z):
"""
Gaussian (Radial Bias Function) kernel function
Parameters
----------
x : array-like, shape = [n_features]
Data point vector
z : array-like, shape = [n_samples, n_features]
Data vector, where n_samples in the number of samples and
n_features is the number of features.
Returns
-------
K : array-like, shape = [n_samples]
Kernel for each sample
dK2 : array-like, shape = [n_samples]
Kernel derivative evaluated at each sample
"""
if z.ndim > 1:
K = np.exp(-self.gamma_*np.linalg.norm(x - z, axis = 1)**2)
#dK = 2*self.gamma*(x - z)*np.exp(-self.gamma*np.linalg.norm(x - z, axis = 1)**2)
dK = np.dot(np.diag(np.exp(-self.gamma_*np.linalg.norm(x - z, axis = 1)**2)), 2*self.gamma_*(x - z))
else:
K = np.exp(-self.gamma_*np.linalg.norm(x - z)**2)
dK = 2*self.gamma_*(x - z)*np.exp(-self.gamma_*np.linalg.norm(x - z)**2)
return K, dK
def _build_qp(self, X, y):
"""
Construct quadratic programming elements
Parameters
----------
X : array-like, shape = [n_samples, n_features]
Data vector, where n_samples in the number of samples and
n_features is the number of features.
y : array-like, shape = [n_samples]
Target vector relative to X
Returns
-------
P : array-like, shape = [n_samples, n_samples]
q : array-like, shape = [n_samples]
G : array-like, shape = [n_constraints, n_samples]
h : array-like, shape = [n_constraints]
A : array-like, shape = [n_constraints, n_samples]
b : array-like, shape = [n_constraints]
"""
# Build P
P = np.eye(len(y))
for i in range(len(y) - 1):
for j in range(i + 1, len(y)):
Kxx, _ = self._rbf_kernel(X[i,:],X[j,:])
P[i,j] = y[i]*y[j]*Kxx
P = self._nearestPD(P + P.T - np.eye(len(y)))
# Build q
q = np.zeros(len(y))
# Build G
G = np.vstack((-np.eye(len(y)),np.eye(len(y))))
# Build h
h = np.hstack((np.zeros(len(y)),self.C_*np.ones(len(y))))
# Build A
A = y
# Build b
b = 1.
return P, q, G, h, A, b
SVDD算法可以通过对于训练数据进行学习,并对测试机数据预测。执行代码如下:
# Create variable mesh
num = 50
x1 = np.linspace(-2., 3., num = num)
x2 = np.linspace(-2., 2., num = num)
X1, X2 = np.meshgrid(x1, x2)
inp = np.hstack((X1.reshape((num*num,1)), X2.reshape((num*num,1))))
# Get decision threshold
R2 = clf.decision_function(inp)
# Plot decision threshold contour
C = R2.reshape((num,num))
plt.contour(X1, X2, C, levels = [0], colors = 'black')
# Plot data points
plt.plot(X[y==1,0],X[y==1,1],'r.', label = 'Target')
plt.plot(X[y==-1,0],X[y==-1,1],'b.', label = 'Outlier')
# Plot support vectors
plt.plot(clf.support_vectors_[:,0],clf.support_vectors_[:,1],'ko', label = 'Support Vector', mfc='none')
plt.legend()
plt.show()
svdd算法处理的数据特点与svm算法类似,本质上两个算法的差异较小。