'''
auther:Deniu He
date:2020-09-26
'''
from copy import deepcopy
import numpy as np
import matplotlib.pyplot as plt
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.linear_model import LogisticRegression
from scipy.spatial.distance import pdist,squareform
import cvxpy as cvx
class LR():
def __init__(self, X, y, X_pool, y_pool):
self.X = X
self.y = np.vstack(y)
self.N = self.X.shape[0]
print("标记数据个数=",self.N)
self.d = self.X.shape[1]
###--------------------------------
self.X_pool = X_pool
self.y_pool = y_pool
self.poolN = self.X_pool.shape[0]
print("全体数据个数=",self.poolN)
self.neiNum = 5
self.dist_matrix = squareform(pdist(X=self.X_pool, metric='euclidean'))
self.nn_matrix = np.zeros((self.poolN,self.neiNum))
self.adj_matrix = self.adjacency_matrix()
self.lap_matrix = self.laplace_matrix()
####------------------------------
self.lambd1 = 0.1
self.lambd2 = 0.1
self.w = cvx.Variable((self.d, 1))
self.P = self.P_matrix()
####-----------------------------
self.loglikehood = cvx.sum(cvx.multiply(self.y, self.X @ self.w) - cvx.logistic(self.X @ self.w) )
self.L2regu = self.lambd1 * cvx.norm(self.w, 1)
self.Lapregu = self.lambd2 * cvx.quad_form(self.P,self.lap_matrix)
self.obj = cvx.Maximize(self.loglikehood - self.Lapregu - self.L2regu)
# self.obj = cvx.Maximize(self.loglikehood - self.Lapregu)
self.prob = cvx.Problem(self.obj).solve()
####------------------------------
print(self.w.value)
def adjacency_matrix(self):
for i in range(self.poolN):
ord_idx = np.argsort(self.dist_matrix[i])
neibor = []
for j in range(self.neiNum + 1):
if i != ord_idx[j]:
neibor.append(ord_idx[j])
neibor = np.array(neibor)
self.nn_matrix[i] = neibor
adj_matrix = np.zeros((self.poolN,self.poolN))
for i in range(self.poolN-1):
for j in range(i,self.poolN):
if i in self.nn_matrix[j] or j in self.nn_matrix[i]:
adj_matrix[i,j] = 1
adj_matrix[j,i] = 1
return adj_matrix
def laplace_matrix(self):
colum_sum = np.sum(self.adj_matrix,0) ##列相加
D = np.zeros((self.poolN,self.poolN))
for i in range(self.poolN):
D[i,i] = colum_sum[i]
lap_matrix = D - self.adj_matrix
return lap_matrix
def P_matrix(self):
P = self.X_pool @ self.w
return P
def pred_prob(self, X_test):
prob = 1 / (1 + cvx.exp(-(X_test @ self.w)).value)
return prob
def pred(self, X_test):
result = []
for ele in 1 / (1 + cvx.exp(-(X_test @ self.w)).value):
if ele >= 0.5:
result.append(1)
else:
result.append(0)
return np.array(result)
if __name__ == '__main__':
X, y = datasets.make_blobs(n_samples=500, n_features=2, centers=2, cluster_std=[3, 3], random_state=123)
# X, y = datasets.load_iris(return_X_y=True)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.5, random_state=0)
print("测试数据个数=",len(y_test))
X_unlabeled, X_labeled, y_unlabeled, y_labeled = train_test_split(X_train,y_train,test_size=0.02,random_state=0)
plt.scatter(X[:,0],X[:,1],c=y)
plt.scatter(X_labeled[:,0],X_labeled[:,1],c='r',marker="*",s=200,label="Labeled instance")
plt.legend()
plt.show()
model = LR(X=X_labeled,y=y_labeled,X_pool=X_train,y_pool=y_train)
prob = model.pred_prob(X_test=X_test)
y_pred = model.pred(X_test=X_test)
Acc = accuracy_score(y_true=y_test,y_pred=y_pred)
print("拉普拉斯正则逻辑回归=",Acc)
model_lr = LogisticRegression()
model_lr.fit(X=X_labeled,y=y_labeled)
#####-----------------下面的是调用sklearn工具包实现的。
ypred = model_lr.predict(X=X_test)
acc = accuracy_score(y_true=y_test,y_pred=y_pred)
print("sklearn调包逻辑回归",acc)
感觉就是个垃圾!没有体现出效果来呢?
分类效果跟调包sklearn一模一样,而且还多出了一个超参数!垃圾!
注释:
有文献使用概率
很显然会使得拉普拉斯正则项非凸。所以,才会使用上面拉普拉斯正则项。