TSVM实现

代码于 https://github.com/horcham/TSVM

# coding:utf-8
import numpy as np
import sklearn.svm as svm
from sklearn.externals import joblib
import pickle
from sklearn.model_selection import train_test_split,cross_val_score

class TSVM(object):
    def __init__(self):
        pass

    def initial(self, kernel='linear'):
        '''
        Initial TSVM
        Parameters
        ----------
        kernel: kernel of svm
        '''
        self.Cl, self.Cu = 1.5, 0.001
        self.kernel = kernel
        self.clf = svm.SVC(C=1.5, kernel=self.kernel)

    def load(self, model_path='./TSVM.model'):
        '''
        Load TSVM from model_path
        Parameters
        ----------
        model_path: model path of TSVM
                        model should be svm in sklearn and saved by sklearn.externals.joblib
        '''
        self.clf = joblib.load(model_path)

    def train(self, X1, Y1, X2):
        '''
        Train TSVM by X1, Y1, X2
        Parameters
        ----------
        X1: Input data with labels
                np.array, shape:[n1, m], n1: numbers of samples with labels, m: numbers of features
        Y1: labels of X1
                np.array, shape:[n1, ], n1: numbers of samples with labels
        X2: Input data without labels
                np.array, shape:[n2, m], n2: numbers of samples without labels, m: numbers of features
        '''
        N = len(X1) + len(X2)
        sample_weight = np.ones(N)
        sample_weight[len(X1):] = self.Cu

        self.clf.fit(X1, Y1)
        Y2 = self.clf.predict(X2)
        Y2 = np.expand_dims(Y2, 1)
        X2_id = np.arange(len(X2))
        X3 = np.vstack([X1, X2])
        Y3 = np.vstack([Y1, Y2])

        while self.Cu < self.Cl:
            self.clf.fit(X3, Y3, sample_weight=sample_weight)
            while True:
                Y2_d = self.clf.decision_function(X2)    # linear: w^Tx + b
                Y2 = Y2.reshape(-1)
                epsilon = 1 - Y2 * Y2_d   # calculate function margin
                positive_set, positive_id = epsilon[Y2 > 0], X2_id[Y2 > 0]
                negative_set, negative_id = epsilon[Y2 < 0], X2_id[Y2 < 0]
                positive_max_id = positive_id[np.argmax(positive_set)]
                negative_max_id = negative_id[np.argmax(negative_set)]
                a, b = epsilon[positive_max_id], epsilon[negative_max_id]
                if a > 0 and b > 0 and a + b > 2.0:
                    Y2[positive_max_id] = Y2[positive_max_id] * -1
                    Y2[negative_max_id] = Y2[negative_max_id] * -1
                    Y2 = np.expand_dims(Y2, 1)
                    Y3 = np.vstack([Y1, Y2])
                    self.clf.fit(X3, Y3, sample_weight=sample_weight)
                else:
                    break
            self.Cu = min(2*self.Cu, self.Cl)
            sample_weight[len(X1):] = self.Cu

    def score(self, X, Y):
        '''
        Calculate accuracy of TSVM by X, Y
        Parameters
        ----------
        X: Input data
                np.array, shape:[n, m], n: numbers of samples, m: numbers of features
        Y: labels of X
                np.array, shape:[n, ], n: numbers of samples
        Returns
        -------
        Accuracy of TSVM
                float
        '''
        return self.clf.score(X, Y)

    def predict(self, X):
        '''
        Feed X and predict Y by TSVM
        Parameters
        ----------
        X: Input data
                np.array, shape:[n, m], n: numbers of samples, m: numbers of features
        Returns
        -------
        labels of X
                np.array, shape:[n, ], n: numbers of samples
        '''
        return self.clf.predict(X)

    def save(self, path='./TSVM.model'):
        '''
        Save TSVM to model_path
        Parameters
        ----------
        model_path: model path of TSVM
                        model should be svm in sklearn
        '''
        joblib.dump(self.clf, path)

if __name__ == '__main__':
    model = TSVM()
    model.initial()
    model.train(X1, Y1, X2)
    Y_hat = model.predict(X)
    accuracy = model.score(X, Y)

TSVM

TSVM算法实现,参照《机器学习》(周志华) 13.3节

Usage

  • 构建TSVM

    model = TSVM()
    
  • TSVM的初始化

    model.initial(kernel = 'linear')
    

    kernel为所使用的svm的核, 默认为linear

    若要加载已有模型,则

    model.load(model_path)
    

    model_path为TSVM所存放的路径

  • 训练TSVM

    model.train(X1, Y1, X2)
    

    其中,X1为有标签数据,其标签为Y1X2为无标签数据。 X1X2numpy.array,shape为[n,m]
    Y1numpy.array,shape为[n, ],其中,n代表样本个数,m代表属性个数. 这个项目只能支持二分类, 且标签只能是-1和1. (谢谢小伙伴指出来)

  • 使用TSVM预测

    Y_hat = model.predict(X)
    

    其中,Y_hatnumpy.array,shape为[n, ]

  • 计算TSVM准确率

    accuracy = model.score(X, Y)
    
  • 保存模型

    model.save(model_path)
    
  • 5
    点赞
  • 55
    收藏
    觉得还不错? 一键收藏
  • 16
    评论
TSVM (Tree-based Support Vector Machine) 是一种集成学习方法,结合了决策树和支持向量机的优点。在Python中,我们可以使用sklearn库中的`tree.DecisionTreeClassifier`和`ensemble.RandomForestClassifier`等工具来构建类似的模型,但sklearn并没有直接提供TSVM的模块。 如果你想实现一个类似的功能,通常会通过自定义或者利用第三方库(如xgboost、lightgbm)来实现。例如,XGBoost提供了`GradientBoostingClassifier`,它可以用来训练决策森林,其行为类似于TSVM。 以下是一个简单的示例,展示如何用XGBoost创建一个随机森林,这可以近似TSVM的效果: ```python # 导入所需的库 import xgboost as xgb # 假设我们已经有了数据集 X, y X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) # 创建DMatrix对象,这是XGBoost的核心数据结构 dtrain = xgb.DMatrix(X_train, label=y_train) dtest = xgb.DMatrix(X_test, label=y_test) # 参数设置,这里仅做简单示例,实际应用可能需要调整 param = { 'max_depth': 3, # 决策树的最大深度 'objective': 'multi:softprob', # 多类别概率输出 'num_class': len(set(y_train)), # 类别数量 } # 训练模型 model = xgb.train(param, dtrain, num_boost_round=100) # 预测并评估 y_pred = model.predict(dtest) ``` 请注意,这只是一个基础示例,实际使用时可能需要更复杂的参数调优以及交叉验证等步骤。如果你想要一个更精确的TSVM实现,可以查阅XGBoost或LightGBM的官方文档,或者搜索相关的研究论文。
评论 16
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值