TSVM实现

最新推荐文章于 2023-11-06 21:45:11 发布

Horcham

最新推荐文章于 2023-11-06 21:45:11 发布

阅读量9.4k

点赞数 5

分类专栏：机器学习

本文链接：https://blog.csdn.net/Horcham/article/details/86707821

版权

机器学习专栏收录该内容

12 篇文章 0 订阅

订阅专栏

代码于 https://github.com/horcham/TSVM

# coding:utf-8
import numpy as np
import sklearn.svm as svm
from sklearn.externals import joblib
import pickle
from sklearn.model_selection import train_test_split,cross_val_score

class TSVM(object):
    def __init__(self):
        pass

    def initial(self, kernel='linear'):
        '''
        Initial TSVM
        Parameters
        ----------
        kernel: kernel of svm
        '''
        self.Cl, self.Cu = 1.5, 0.001
        self.kernel = kernel
        self.clf = svm.SVC(C=1.5, kernel=self.kernel)

    def load(self, model_path='./TSVM.model'):
        '''
        Load TSVM from model_path
        Parameters
        ----------
        model_path: model path of TSVM
                        model should be svm in sklearn and saved by sklearn.externals.joblib
        '''
        self.clf = joblib.load(model_path)

    def train(self, X1, Y1, X2):
        '''
        Train TSVM by X1, Y1, X2
        Parameters
        ----------
        X1: Input data with labels
                np.array, shape:[n1, m], n1: numbers of samples with labels, m: numbers of features
        Y1: labels of X1
                np.array, shape:[n1, ], n1: numbers of samples with labels
        X2: Input data without labels
                np.array, shape:[n2, m], n2: numbers of samples without labels, m: numbers of features
        '''
        N = len(X1) + len(X2)
        sample_weight = np.ones(N)
        sample_weight[len(X1):] = self.Cu

        self.clf.fit(X1, Y1)
        Y2 = self.clf.predict(X2)
        Y2 = np.expand_dims(Y2, 1)
        X2_id = np.arange(len(X2))
        X3 = np.vstack([X1, X2])
        Y3 = np.vstack([Y1, Y2])

        while self.Cu < self.Cl:
            self.clf.fit(X3, Y3, sample_weight=sample_weight)
            while True:
                Y2_d = self.clf.decision_function(X2)    # linear: w^Tx + b
                Y2 = Y2.reshape(-1)
                epsilon = 1 - Y2 * Y2_d   # calculate function margin
                positive_set, positive_id = epsilon[Y2 > 0], X2_id[Y2 > 0]
                negative_set, negative_id = epsilon[Y2 < 0], X2_id[Y2 < 0]
                positive_max_id = positive_id[np.argmax(positive_set)]
                negative_max_id = negative_id[np.argmax(negative_set)]
                a, b = epsilon[positive_max_id], epsilon[negative_max_id]
                if a > 0 and b > 0 and a + b > 2.0:
                    Y2[positive_max_id] = Y2[positive_max_id] * -1
                    Y2[negative_max_id] = Y2[negative_max_id] * -1
                    Y2 = np.expand_dims(Y2, 1)
                    Y3 = np.vstack([Y1, Y2])
                    self.clf.fit(X3, Y3, sample_weight=sample_weight)
                else:
                    break
            self.Cu = min(2*self.Cu, self.Cl)
            sample_weight[len(X1):] = self.Cu

    def score(self, X, Y):
        '''
        Calculate accuracy of TSVM by X, Y
        Parameters
        ----------
        X: Input data
                np.array, shape:[n, m], n: numbers of samples, m: numbers of features
        Y: labels of X
                np.array, shape:[n, ], n: numbers of samples
        Returns
        -------
        Accuracy of TSVM
                float
        '''
        return self.clf.score(X, Y)

    def predict(self, X):
        '''
        Feed X and predict Y by TSVM
        Parameters
        ----------
        X: Input data
                np.array, shape:[n, m], n: numbers of samples, m: numbers of features
        Returns
        -------
        labels of X
                np.array, shape:[n, ], n: numbers of samples
        '''
        return self.clf.predict(X)

    def save(self, path='./TSVM.model'):
        '''
        Save TSVM to model_path
        Parameters
        ----------
        model_path: model path of TSVM
                        model should be svm in sklearn
        '''
        joblib.dump(self.clf, path)

if __name__ == '__main__':
    model = TSVM()
    model.initial()
    model.train(X1, Y1, X2)
    Y_hat = model.predict(X)
    accuracy = model.score(X, Y)

TSVM

TSVM算法实现，参照《机器学习》（周志华） 13.3节

Usage

构建TSVM
```
model = TSVM()
```
TSVM的初始化
```
model.initial(kernel = 'linear')
```
kernel为所使用的svm的核，默认为linear

若要加载已有模型，则
```
model.load(model_path)
```
model_path为TSVM所存放的路径
训练TSVM
```
model.train(X1, Y1, X2)
```
其中，X1为有标签数据，其标签为Y1，X2为无标签数据。 X1，X2为numpy.array，shape为[n,m]，
Y1为numpy.array，shape为[n, ]，其中，n代表样本个数，m代表属性个数. 这个项目只能支持二分类, 且标签只能是-1和1. (谢谢小伙伴指出来)
使用TSVM预测
```
Y_hat = model.predict(X)
```
其中，Y_hat为numpy.array，shape为[n, ]
计算TSVM准确率
```
accuracy = model.score(X, Y)
```
保存模型
```
model.save(model_path)
```

Horcham

关注

5
点赞
踩
55

收藏

觉得还不错? 一键收藏
16
评论
TSVM实现

# coding:utf-8import numpy as npimport sklearn.svm as svmfrom sklearn.externals import joblibimport picklefrom sklearn.model_selection import train_test_split,cross_val_scoreclass TSVM(object):...
复制链接

扫一扫

专栏目录