import numpy as np
import math
import random
class SVM:
def __init__(self, data,train_ratio=0.9):
self.W = None
self.b = None
self.data = data
self.train_X = None
self.train_Y = None
self.validation_X = None
self.validation_Y = None
self.derivative_list = []
self.train_ratio=train_ratio
self.TrainValidation()
self.DerivativerListCreate()
def Tanh(self, x):
return (math.e ** x-math.e ** (-x))/(math.e ** x+math.e ** (-x))
def TanhFirstDerivative(self, x):
return 1-self.Tanh(x)**2
def fit(self, learning_rate=0.01, epochs=1000):
num_samples, num_features = self.train_X.shape
self.W = np.zeros(num_features)
self.b = 0
for _ in range(epochs):
for i, x_i in enumerate(self.train_X):
condition = self.train_Y[i] * \
(np.dot(x_i, self.W) - self.b) >= 1
if condition:
pass
else:
self.W -= 2*learning_rate * \
(- np.dot(x_i, self.train_Y[i])) * \
self.derivative_list[min(200,int(abs(np.dot(x_i, self.W) - self.b-1)*100))]
self.b -= 2*learning_rate * \
self.train_Y[i] *\
self.derivative_list[min(200,int(abs(np.dot(x_i, self.W) - self.b-1)*100))]
print('W:', self.W)
print('b:', self.b)
print('accuracy: ', self.Accuracy())
def TrainValidation(self):
row, col = self.data.shape
numbers = list(range(row))
train_size = int(row*self.train_ratio)
train_list = random.sample(numbers, train_size)
validation_list = [num for num in numbers if num not in train_list]
train_set = np.array([]).reshape(0, col)
validation_set = np.array([]).reshape(0, col)
for i in train_list:
train_set = np.vstack((train_set, self.data[i]))
for i in validation_list:
validation_set = np.vstack((validation_set, self.data[i]))
self.train_X = train_set[:, 0:-1]
self.train_Y = train_set[:, -1]
self.validation_X = validation_set[:, 0:-1]
self.validation_Y = validation_set[:, -1]
def Predict(self, X):
linear_output = np.dot(X, self.W) - self.b
return np.sign(linear_output)
def Accuracy(self):
sum = self.validation_X.shape[0]
cnt = 0
for i in range(sum):
if self.Predict(self.validation_X[i]) == np.sign(self.validation_Y[i]):
cnt += 1
return cnt/sum
def DerivativerListCreate(self):
RANGE = 201
for i in range(RANGE):
self.derivative_list.append(self.TanhFirstDerivative(0.01*i))
注释
- 输入data为numpy矩阵,每行为一条数据,要求数值化输入,每行最后一个为类别(如1,-1)