简单SVM二分类器底层代码python实现

import numpy as np
import math
import random

class SVM:
    # 定义SVM模型
    def __init__(self, data,train_ratio=0.9):
        self.W = None
        self.b = None
        self.data = data
        self.train_X = None
        self.train_Y = None
        self.validation_X = None
        self.validation_Y = None
        self.derivative_list = []
        self.train_ratio=train_ratio
        self.TrainValidation()
        self.DerivativerListCreate()

    def Tanh(self, x):
        return (math.e ** x-math.e ** (-x))/(math.e ** x+math.e ** (-x))

    def TanhFirstDerivative(self, x):
        return 1-self.Tanh(x)**2

    # SVM训练函数
    def fit(self, learning_rate=0.01, epochs=1000):
        num_samples, num_features = self.train_X.shape
        self.W = np.zeros(num_features)
        self.b = 0

        for _ in range(epochs):
            for i, x_i in enumerate(self.train_X):
                condition = self.train_Y[i] * \
                    (np.dot(x_i, self.W) - self.b) >= 1
                if condition:
                    pass
                else:
                    self.W -= 2*learning_rate * \
                        (- np.dot(x_i, self.train_Y[i])) * \
                        self.derivative_list[min(200,int(abs(np.dot(x_i, self.W) - self.b-1)*100))]
                    self.b -= 2*learning_rate * \
                        self.train_Y[i] *\
                        self.derivative_list[min(200,int(abs(np.dot(x_i, self.W) - self.b-1)*100))]

        print('W:', self.W)
        print('b:', self.b)
        print('accuracy:    ', self.Accuracy())

    # 训练集&测试机划分
    def TrainValidation(self):
        # 生成随机数列表
        row, col = self.data.shape
        numbers = list(range(row))
        # 根据比例确定每组的大小
        train_size = int(row*self.train_ratio)
        # 随机从数字列表中抽取数字来构建两组
        train_list = random.sample(numbers, train_size)
        validation_list = [num for num in numbers if num not in train_list]
        train_set = np.array([]).reshape(0, col)
        validation_set = np.array([]).reshape(0, col)
        # 划分训练集与数据集
        for i in train_list:
            train_set = np.vstack((train_set, self.data[i]))
        for i in validation_list:
            validation_set = np.vstack((validation_set, self.data[i]))
        # 截取训练集&验证集的X和Y
        self.train_X = train_set[:, 0:-1]
        self.train_Y = train_set[:, -1]
        self.validation_X = validation_set[:, 0:-1]
        self.validation_Y = validation_set[:, -1]

    # 预测函数
    def Predict(self, X):
        linear_output = np.dot(X, self.W) - self.b
        return np.sign(linear_output)

    # 准确率计算函数
    def Accuracy(self):
        sum = self.validation_X.shape[0]
        cnt = 0
        for i in range(sum):
            if self.Predict(self.validation_X[i]) == np.sign(self.validation_Y[i]):
                cnt += 1
        return cnt/sum

    # 预计算Tanh函数的一阶导数
    def DerivativerListCreate(self):
        RANGE = 201
        for i in range(RANGE):
            self.derivative_list.append(self.TanhFirstDerivative(0.01*i))

注释

  • 输入data为numpy矩阵,每行为一条数据,要求数值化输入,每行最后一个为类别(如1,-1)
  • 0
    点赞
  • 8
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值