《软间隔支持向量机有关算法》实验报告
实验名称: 软间隔支持向量机有关算法
一、问题描述
6.4.3中Hinge损失的软间隔支持向量机的实现,采用的是次梯度下降算法。请修改该算法,实现软间隔支持向量机的随机次梯度下降算法、小批量次梯度下降算法。
二、实验目的
学习支持向量机的算法。
三、实验内容
数据导入
from sklearn import datasets
数据预处理
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]
y = 2 * (iris["target"]==2).astype(np.int32).reshape(-1,1) - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)
算法描述
随机次梯度下降算法:
梯度下降法:要计算所有数据上的经验损失的梯度,再求平均值,耗时。
随机梯度下降法:每次迭代,从所有训练数据中取一个采样来估计目标函数的梯度。适用于训练数据规模较大的场合,能够大幅度地降低算法的时间复杂度。
class LinearRegressionSGD:
def __init__(self, C=1000):
self.C = C
def fit(self, X, y, eta=0.01, N=5000):
m, n = X.shape
w, b = np.zeros((n, 1)), 0
self.w = w
for r in range(N):
i = np.random.randint(m)
x = X[i].reshape(1, -1)
s = (x.dot(w) + b) * y
e = (s < 1).astype(np.int32).reshape(-1, 1)
g_w = 1 / (self.C * m) * w - 1 / m * X.T.dot(y * e)
g_b = - 1 / m * (y * e).sum()
w = w - eta * g_w
b = b - eta * g_b
self.w = w
self.b = b
def predict(self, X):
return np.sign(X.dot(self.w) + self.b)
小批量次梯度下降算法:
本次实验B为90,每次迭代,随机选取B条训练数据。并沿着B条数据上的经验损失梯度的反方向更新参数w。时间复杂度优于梯度下降算法,
稳定性优于随机梯度下降算法,兼顾二者之长。
class LinearRegressionMinSGD:
def __init__(self, C=1000):
self.C = C
def fit(self, X, y, eta=0.01, N=5000):
m, n = X.shape
w, b = np.zeros((n, 1)), 0
self.w = w
B = 90
for r in range(N):
batch = np.random.randint(low=0, high=m, size=B)
X_batch = X[batch].reshape(B, -1)
y_batch = y[batch].reshape(B, -1)
# e = X_batch.dot(w) - y_batch
# s = (X.dot(w) + b) * y
s = (X_batch.dot(w) + b) * y
e = (s < 1).astype(np.int32).reshape(-1, 1)
g_w = 1 / (self.C * m) * w - 1 / m * X.T.dot(y * e)
g_b = - 1 / m * (y * e).sum()
w = w - eta * g_w
b = b - eta * g_b
self.w = w/N
self.b = b/N
def predict(self, X):
return np.sign(X.dot(self.w) + self.b)
主要代码
随机次梯度下降算法:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
class LinearRegressionSGD:
def __init__(self, C=1000):
self.C = C
def fit(self, X, y, eta=0.01, N=5000):
m, n = X.shape
w, b = np.zeros((n, 1)), 0
self.w = w
for r in range(N):
i = np.random.randint(m)
x = X[i].reshape(1, -1)
s = (X.dot(w) + b) * y
e = (s < 1).astype(np.int32).reshape(-1, 1)
g_w = 1 / (self.C * m) * w - 1 / m * X.T.dot(y * e)
g_b = - 1 / m * (y * e).sum()
w = w - eta * g_w
b = b - eta * g_b
self.w = w/N
self.b = b/N
def predict(self, X):
return np.sign(X.dot(self.w) + self.b)
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]
y = 2 * (iris["target"]==2).astype(np.int32).reshape(-1,1) - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)
model = LinearRegressionSGD()
model.fit(X_train, y_train,N=10000)
y_pred = model.predict(X_test)
print(model.w)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy= {}".format(accuracy))
plt.figure(12)
plt.axis([3,7,0.5,3])
plt.plot(X_train[:, 0][y_train[:,0]==1] , X_train[:, 1][y_train[:,0]==1], "bs", ms=3)
plt.plot(X_train[:, 0][y_train[:,0]==-1], X_train[:, 1][y_train[:,0]==-1], "yo", ms=3)
x0 = np.linspace(3, 7, 200)
w = model.w
b = model.b
line = -w[0]/w[1] * x0 - b/w[1]
plt.plot(x0, line, color='black')
plt.show()
小批量次梯度下降算法:
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
class LinearRegressionMinSGD:
def __init__(self, C=1000):
self.C = C
def fit(self, X, y, eta=0.01, N=5000):
m, n = X.shape
w, b = np.zeros((n, 1)), 0
self.w = w
B = 90
for r in range(N):
batch = np.random.randint(low=0, high=m, size=B)
X_batch = X[batch].reshape(B, -1)
y_batch = y[batch].reshape(B, -1)
# e = X_batch.dot(w) - y_batch
# s = (X.dot(w) + b) * y
s = (X_batch.dot(w) + b) * y
e = (s < 1).astype(np.int32).reshape(-1, 1)
g_w = 1 / (self.C * m) * w - 1 / m * X.T.dot(y * e)
g_b = - 1 / m * (y * e).sum()
w = w - eta * g_w
b = b - eta * g_b
self.w = w/N
self.b = b/N
def predict(self, X):
return np.sign(X.dot(self.w) + self.b)
iris = datasets.load_iris()
X = iris["data"][:, (2, 3)]
y = 2 * (iris["target"]==2).astype(np.int32).reshape(-1,1) - 1
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=5)
model = LinearRegressionMinSGD()
model.fit(X_train, y_train,N=10000)
y_pred = model.predict(X_test)
print(model.w)
accuracy = accuracy_score(y_test, y_pred)
print("accuracy= {}".format(accuracy))
plt.figure(12)
plt.axis([3,7,0.5,3])
plt.plot(X_train[:, 0][y_train[:,0]==1] , X_train[:, 1][y_train[:,0]==1], "bs", ms=3)
plt.plot(X_train[:, 0][y_train[:,0]==-1], X_train[:, 1][y_train[:,0]==-1], "yo", ms=3)
x0 = np.linspace(3, 7, 200)
w = model.w
b = model.b
line = -w[0]/w[1] * x0 - b/w[1]
plt.plot(x0, line, color='black')
plt.show()
四、实验结果及分析
随机次梯度下降算法:
小批量次梯度下降算法:时间复杂度优于梯度下降算法,
稳定性优于随机梯度下降算法,兼顾二者之长。