Python：拉普拉斯正则逻辑回归

最新推荐文章于 2022-07-01 15:33:34 发布

DeniuHe

最新推荐文章于 2022-07-01 15:33:34 发布

阅读量402

点赞数

分类专栏： Python学习算法

本文链接：https://blog.csdn.net/DeniuHe/article/details/108918248

版权

Python学习同时被 2 个专栏收录

239 篇文章 14 订阅

订阅专栏

算法

193 篇文章 2 订阅

订阅专栏

'''
auther:Deniu He
date:2020-09-30
'''
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.spatial.distance import pdist,squareform
import cvxpy as cvx


class LRLR():
    def __init__(self, X_labeled=None, y_labeled=None ,adjMatrix=None, X_pool=None):
        self.X = None
        self.y = None
        self.d = None
        ###-------------------------------
        self.X_pool = None
        self.adjMatrix = None
        self.N = None
        self.lapMatrix = None
        ###------------------------------
        self.lambd1 = None
        self.lambd2 = None
        self.w = None
        self.P = None
        ###------------------------------
        self.loglikehood = None
        self.L2regu = None
        self.Lapregu = None
        self.obj = None
        self.prob = None

    def fit(self,X_labeled,y_labeled,adjMatrix,X_pool):
        self.X = X_labeled
        self.y = np.vstack(y_labeled)
        self.d = X_labeled.shape[1]
        ###-------------------------------
        self.X_pool = X_pool
        self.adjMatrix = adjMatrix
        self.N = adjMatrix.shape[0]
        self.lapMatrix = np.diag(np.sum(adjMatrix,0)) - adjMatrix
        ###------------------------------
        self.lambd1 = 0.1
        self.lambd2 = 0.1
        self.w = cvx.Variable((self.d, 1))
        self.P = self.X_pool @ self.w
        ###------------------------------
        self.loglikehood = cvx.sum(cvx.multiply(self.y, self.X @ self.w) - cvx.logistic(self.X @ self.w))
        self.L2regu = self.lambd1 * cvx.pnorm(self.w, p=2)**2
        self.Lapregu = self.lambd2 * cvx.quad_form(self.P,self.lapMatrix)
        self.obj = cvx.Maximize(self.loglikehood - self.Lapregu - self.L2regu)
        self.prob = cvx.Problem(self.obj).solve()

    def pred_prob(self,X_test):
        prob = 1 / (1 + cvx.exp(-(X_test @ self.w)).value)
        return prob

    def pred(self, X_test):
        result = []
        for ele in 1 / (1 + cvx.exp(-(X_test @ self.w)).value):
            if ele >= 0.5:
                result.append(1)
            else:
                result.append(0)
        return np.array(result)

if __name__ == '__main__':
    X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=2, cluster_std=[3, 3], random_state=123)
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.992,random_state=1)
    print("测试数据个数=",len(y_test))

    plt.scatter(X[:,0],X[:,1],c=y)
    plt.scatter(X_train[:,0],X_train[:,1],c='r',marker="*",s=200,label="Labeled instance")
    plt.legend()
    plt.show()

    distMatrix = squareform(pdist(X,metric='euclidean'))

    def adjacency_matrix(X):
        n = X.shape[0]
        nn_matrix = np.zeros((n,5))
        for i in range(n):
            ord_idx = np.argsort(distMatrix[i])
            neibor = []
            for j in range(5+1):
                if i != ord_idx[j]:
                    neibor.append(ord_idx[j])
            neibor = np.array(neibor)
            nn_matrix[i] = neibor
        adj_matrix = np.zeros((n,n))
        for i in range(n-1):
            for j in range(i,n):
                if i in nn_matrix[j] or j in nn_matrix[i]:
                    adj_matrix[i,j] = 1
                    adj_matrix[j,i] = 1
        return adj_matrix

    adjMatrix = adjacency_matrix(X=X)

    model = LRLR()
    model.fit(X_labeled=X_train,y_labeled=y_train,adjMatrix=adjMatrix,X_pool=X)
    y_pred = model.pred(X_test=X_test)
    acc = accuracy_score(y_true=y_test,y_pred=y_pred)
    print("精度：",acc)
    model2 = LogisticRegression()
    model2.fit(X=X_train,y=y_train)
    y_pred2 = model2.predict(X=X_test)
    acc2 = accuracy_score(y_true=y_test,y_pred=y_pred2)
    print("调包精度：",acc2)