Python:拉普拉斯正则逻辑回归 Laplacian Regularization Logistic Regression

'''
auther:Deniu He
date:2020-09-26
'''

from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.spatial.distance import pdist,squareform
import cvxpy as cvx


class LR():
    def __init__(self, X, y, X_pool, y_pool):
        self.X = X
        self.y = np.vstack(y)
        self.N = self.X.shape[0]
        print("标记数据个数=",self.N)
        self.d = self.X.shape[1]
        ###--------------------------------
        self.X_pool = X_pool
        self.y_pool = y_pool
        self.poolN = self.X_pool.shape[0]
        print("全体数据个数=",self.poolN)
        self.neiNum = 5
        self.dist_matrix = squareform(pdist(X=self.X_pool, metric='euclidean'))
        self.nn_matrix = np.zeros((self.poolN,self.neiNum))
        self.adj_matrix = self.adjacency_matrix()
        self.lap_matrix = self.laplace_matrix()
        ####------------------------------
        self.lambd1 = 0.1
        self.lambd2 = 0.1
        self.w = cvx.Variable((self.d, 1))
        self.P = self.P_matrix()
        ####-----------------------------
        self.loglikehood = cvx.sum(cvx.multiply(self.y, self.X @ self.w) - cvx.logistic(self.X @ self.w) )
        self.L2regu = self.lambd1 * cvx.norm(self.w, 1)
        self.Lapregu = self.lambd2 * cvx.quad_form(self.P,self.lap_matrix)
        self.obj = cvx.Maximize(self.loglikehood - self.Lapregu - self.L2regu)
        # self.obj = cvx.Maximize(self.loglikehood - self.Lapregu)
        self.prob = cvx.Problem(self.obj).solve()
        ####------------------------------
        print(self.w.value)
    def adjacency_matrix(self):
        for i in range(self.poolN):
            ord_idx = np.argsort(self.dist_matrix[i])
            neibor = []
            for j in range(self.neiNum + 1):
                if i != ord_idx[j]:
                    neibor.append(ord_idx[j])
            neibor = np.array(neibor)
            self.nn_matrix[i] = neibor
        adj_matrix = np.zeros((self.poolN,self.poolN))
        for i in range(self.poolN-1):
            for j in range(i,self.poolN):
                if i in self.nn_matrix[j] or j in self.nn_matrix[i]:
                    adj_matrix[i,j] = 1
                    adj_matrix[j,i] = 1
        return adj_matrix

    def laplace_matrix(self):
        colum_sum = np.sum(self.adj_matrix,0)   ##列相加
        D = np.zeros((self.poolN,self.poolN))
        for i in range(self.poolN):
            D[i,i] = colum_sum[i]
        lap_matrix = D - self.adj_matrix
        return lap_matrix

    def P_matrix(self):
        P = self.X_pool @ self.w
        return P

    def pred_prob(self, X_test):
        prob = 1 / (1 + cvx.exp(-(X_test @ self.w)).value)
        return prob

    def pred(self, X_test):
        result = []
        for ele in 1 / (1 + cvx.exp(-(X_test @ self.w)).value):
            if ele >= 0.5:
                result.append(1)
            else:
                result.append(0)
        return np.array(result)


if __name__ == '__main__':
    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=2, cluster_std=[3, 3], random_state=123)
    # X, y = datasets.load_iris(return_X_y=True)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
    print("测试数据个数=",len(y_test))
    X_unlabeled, X_labeled, y_unlabeled, y_labeled = train_test_split(X_train,y_train,test_size=0.02,random_state=0)
    plt.scatter(X[:,0],X[:,1],c=y)
    plt.scatter(X_labeled[:,0],X_labeled[:,1],c='r',marker="*",s=200,label="Labeled instance")
    plt.legend()
    plt.show()

    model = LR(X=X_labeled,y=y_labeled,X_pool=X_train,y_pool=y_train)
    prob = model.pred_prob(X_test=X_test)
    y_pred = model.pred(X_test=X_test)
    Acc = accuracy_score(y_true=y_test,y_pred=y_pred)
    print("拉普拉斯正则逻辑回归=",Acc)
    model_lr = LogisticRegression()
    model_lr.fit(X=X_labeled,y=y_labeled)
    #####-----------------下面的是调用sklearn工具包实现的。
    ypred = model_lr.predict(X=X_test)
    acc = accuracy_score(y_true=y_test,y_pred=y_pred)
    print("sklearn调包逻辑回归",acc)

感觉就是个垃圾!没有体现出效果来呢?

分类效果跟调包sklearn一模一样,而且还多出了一个超参数!垃圾!

注释:LaplaRegu= f^T L f

f = w^T X

有文献使用概率LapRegu = P^T L P

P= \frac{1}{1+e^{-w ^TX}}

很显然会使得拉普拉斯正则项非凸。所以,才会使用上面拉普拉斯正则项。

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

DeniuHe

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值