Reference:Pilsung Kang, Sungzoon Cho. Locally linear reconstruction for instance-based learning,Pattern Recognition, 2008 (41): 3507-3518
'''
author:Deniu He
organization:CQUPT
date:2021-01-01
Reference:Pilsung Kang, Sungzoon Cho. Locally linear reconstruction for instance-based learning,Pattern Recognition, 2008 (41): 3507-3518
'''
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.neighbors import KNeighborsClassifier
from collections import OrderedDict
import cvxpy as cvx
class KNNLLR():
def __init__(self,X_train,y_train,X_test,y_test,k):
self.X = X_train
self.y = y_train
self.X_test = X_test
self.y_test = y_test
self.labels = np.unique(y_train)
self.k = k
self.d = self.X.shape[1]
self.n_test = self.X_test.shape[0]
self.n_train = self.X.shape[0]
self.dist_matrix = np.zeros((self.n_test,self.n_train))
# print("距离矩阵",self.dist_matrix.shape)
self.k_nn = self.KNN()
self.w = np.zeros((self.n_test,self.k))
self.y_pred = np.zeros(self.n_test)
def D(self,a,b):
return np.sqrt(sum((a-b)**2))
def KNN(self):
for i in range(self.n_test):
for j in range(self.n_train):
# print("i",i,"j",j,self.D(self.X[j],self.X_test[i]))
self.dist_matrix[i,j] = self.D(self.X[j],self.X_test[i])
knn = np.zeros((self.n_test,self.k))
for i in range(self.n_test):
ordidx = np.argsort(self.dist_matrix[i])
nn = []
for k in range(self.k+1):
if ordidx[k] != i:
nn.append(ordidx[k])
else:
continue
knn[i] = np.array(nn[0:self.k])
return knn.astype(np.int)
def LLR(self):
for i in range(self.n_test):
w = cvx.Variable((self.k, 1))
constraint = [w >= 0, cvx.sum(w,axis=0)==1]
obj = cvx.Minimize(cvx.norm2(self.X_test[i].reshape(-1,1) - self.X[self.k_nn[i]].T @ w) )
prob = cvx.Problem(obj,constraint).solve()
weight = np.hstack(w.value)
Eval = OrderedDict()
for lab in self.labels:
Eval[lab] = 0
for j, idx in enumerate(self.k_nn[i]):
Eval[self.y[idx]] += weight[j]
self.y_pred[i] = max(Eval,key=Eval.get)
if __name__ == '__main__':
X, y = datasets.load_iris(return_X_y=True)
Acc_list1 = []
Acc_list2 = []
for r in range(100):
print("重复执行第{}次".format(r))
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = KNNLLR(X_train, y_train, X_test, y_test, k=50)
model.LLR()
pred = model.y_pred
Acc_list1.append(accuracy_score(pred, y_test))
##########################
clf = KNeighborsClassifier(n_neighbors=50)
clf.fit(X_train, y_train)
Acc_list2.append(accuracy_score(y_true=y_test, y_pred=clf.predict(X_test)))
print("LLR_Acc:",np.mean(Acc_list1))
print("调包Acc:",np.mean(Acc_list2))
在k较大的时候,该算法是可以保持一定优势的。
但在k在个位数的时候呢,呵呵!
模糊粗糙k近邻分类器中,权重的定义如下:
import numpy as np
a = np.array([1,2,3,2,5])
print(a**2)
print()
print(sum(a**2))
print(sum(a**2)/(a**2))
大家trade off 一下吧,看看哪个更好用。