Effective Discriminative Feature Selection with Non-trivial Solutions(具有非平凡解的有效判别特征选择) 代码 Python

最近着手于动手复现这篇文章的算法,但是遇到了一些问题,在求广义特征值问题时遇到了矩阵不可逆的情况,论文里提到可以添加一个正则化项,但是效果不太好,不知是否是理解错误。如果存在一些问题,可以一起讨论讨论。个人邮箱:daweizhu2023@163.com,QQ:1765900518

Accuracy: 0.2727272727272727

后来决定在广义特征值问题中,对B矩阵的对角线上加了一个小的正数,A矩阵保持原来的样子。此外,在KNN分类前,主要将特征选择更改为了特征变换,尽管这一修改与原文描述的情况相反(原文基于投影矩阵选择数据矩阵维度的下标,实现特征选择),但是效果异常的好,基本符合文献中实验数据,所以对于这部分修改还需要讨论。

Accuracy: 0.9416666666666667

import numpy as np
import scipy.io as sio
import scipy.linalg as sl
import pandas as pd

'''
一共就有六步
输入:数据矩阵X,标签信息Y,参数γ,l,p
输出:A
'''


def DFS(X, y, gama=0.0001, l=50, p=1):
    n_class = np.size(np.unique(y))
    n_samples, dimension = X.shape[0], X.shape[1]
    print("the number of calss: ", np.size(np.unique(y)))

    mean_vectors = []
    for cl in range(1, n_class):
        mean_vectors.append(np.mean(X[np.ravel(y == cl), :], axis=0))
        # print('Mean Vector class %s: %s\n' % (cl, mean_vectors[cl - 1]))

    overall_mean = np.mean(X, axis=0)

    '''计算类间散度矩阵'''
    S_B = np.zeros((dimension, dimension))
    for i, mean_vec in enumerate(mean_vectors):
        n = X[np.ravel(y == i + 1), :].shape[0]
        mean_vec = mean_vec.reshape(dimension, 1)  # make column vector
        overall_mean = overall_mean.reshape(dimension, 1)  # make column vector
        S_B += n * (mean_vec - overall_mean).dot((mean_vec - overall_mean).T)
    # print('between-class Scatter Matrix:\n', S_B)

    '''计算类内散度矩阵'''
    S_W = np.zeros((dimension, dimension))
    for cl, mv in zip(range(1, dimension), mean_vectors):
        class_sc_mat = np.zeros((dimension, dimension))  # scatter matrix for every class
        for row in X[np.ravel(y == cl)]:
            row, mv = row.reshape(dimension, 1), mv.reshape(dimension, 1)  # make column vectors
            class_sc_mat += (row - mv).dot((row - mv).T)
        S_W += class_sc_mat  # sum class scatter matrices
    # print('within-class Scatter Matrix:\n', S_W)

    '''计算总散度矩阵'''
    S_T = S_B + S_W

    '''初始化'''
    K = 0
    D_k = np.eye(dimension)
    A = np.zeros((dimension, l))
    last_A = np.zeros((dimension, l))
    Div_k = []

    while True:
        # 给出了广义特征值求解函数,Ax=λBx,scipy.linalg.eig(A, B)
        print("前项行列式:", np.linalg.det((gama * D_k - S_B) + 0.7 * np.eye(S_T.shape[0])))  # 0.0
        print("后项行列式:", np.linalg.det(S_T + 0.1 * np.eye(S_T.shape[0])))  # -0.0
        eigenvalues, eigenvectors = sl.eigh(a=(gama * D_k - S_B),
                                            b=S_T + 0.1 * np.eye(S_T.shape[0]))
        eigenvalues = np.real(eigenvalues)
        # print("特征值:", eigenvalues)
        # print("特征向量:", eigenvectors)

        '''更改策略,利用正则化将B转为可逆矩阵,然后将广义特征值问题转为普通特征值问题。'''
        # eigenvalues, eigenvectors = np.linalg.eig(np.linalg.inv(S_T + 0.1 * np.eye(S_T.shape[0])) @ (gama * D_k - S_B))
        # print("+++")

        '''初始化'''
        sorted_indexs = np.argsort(eigenvalues)  # 默认从小到大
        A = eigenvectors[:, sorted_indexs[0:l]]

        '''计算对角矩阵D_k'''
        norm_array = np.linalg.norm(A, axis=1)
        D_k = np.diag((p / 2) * np.power(norm_array, (p - 2)))

        Div_k.append(np.sum(np.abs(np.linalg.norm(A, axis=1) - np.linalg.norm(last_A, axis=1))))

        print("第 " + str(K) + "次迭代\n")
        print("迭代数组:", Div_k)
        if K > 0 and abs(Div_k[K] - Div_k[K - 1]) < 0.0001:
            break

        last_A = A
        K = K + 1

    '''
        借助文献提供的评分机制,对特征进行排序
        根据A行2范数的大小排序,获取最大的值的下标
        '''
    selected_features = []
    norm_vector = np.linalg.norm(A, axis=1)
    norm_vector_ordered = np.argsort(-norm_vector)
    selected_features = norm_vector_ordered[0:l]
    # selected_data = X[:, selected_features]  # n_samples, dimension
    return A, selected_features


if __name__ == '__main__':
    data = sio.loadmat(
        "D:\PythonProjects\Projects\DiscriminantFeatureSelection\experiment\data\ORL\ORL_32x32.mat")
    X = data.get("fea")
    Y = data.get("gnd")

    '''划分数据集:训练集和测试集'''
    from sklearn.model_selection import train_test_split

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size=0.3)

    '''需要看原文,是否需要进行数据预处理?'''

    A, selected_features = DFS(X_train, y_train, gama=0.0001, l=39, p=1)
    df = pd.DataFrame(A)
    df.to_csv('D:\PythonProjects\Projects\DiscriminantFeatureSelection\experiment\contrast\Methods\DFS\DFS_pre2.csv',
              index=False, header=False)
    print('Projection matrix:\n', A)

    X_train_reduced = X_train[:, selected_features]  # n_samples, dimension
    X_test_reduced = X_test[:, selected_features]

    '''可视化一番'''

    '''调用分类算法'''
    from sklearn.neighbors import KNeighborsClassifier
    from sklearn.metrics import accuracy_score

    # 创建knn分类器对象
    # knn = KNeighborsClassifier(n_neighbors=3)
    # knn.fit(X_train_reduced, np.ravel(y_train))
    # y_pred = knn.predict(X_test_reduced)
    # accuracy = accuracy_score(y_test, y_pred.reshape(y_test.shape))
    # print(f"Accuracy: {accuracy}")
    # print("+++")

    X_train_reduced2 = X_train @ A  # n_samples, dimension
    X_test_reduced2 = X_test @ A

    # 创建knn分类器对象2  这个对了,非常之奇怪,可能基础就好,文献描述都写地方没有理解
    knn2 = KNeighborsClassifier(n_neighbors=3)
    knn2.fit(X_train_reduced2, np.ravel(y_train))
    y_pred2 = knn2.predict(X_test_reduced2)
    accuracy2 = accuracy_score(y_test, y_pred2.reshape(y_test.shape))
    print(f"Accuracy: {accuracy2}")
    print("+++")

  • 2
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值