最近在看一本名为机器学习公式推导与代码实现的书,书中有用Numpy实现的LDA算法,具体Python代码如下
import numpy as np
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
# 定义LDA类
class LDA(object):
def __init__(self):
# 初始化权重矩阵
self.w = None
# 协方差矩阵计算方法
def calc_cov(self, X, Y=None):
m = X.shape[0]
# 数据标准化
X = (X - np.mean(X, axis=0)) / np.std(X, axis=0)
Y = X if Y is None else (Y - np.mean(Y, axis=0)) / np.std(Y, axis=0)
return 1 / m * np.matmul(X.T, Y)
# 数据投影方法
def project(self, X, y):
# LDA拟合获取模型权重
self.fit(X, y)
return X.dot(self.w)
# LDA拟合方法
def fit(self, X, y):
# (1)按类分组
X0 = X[y == 0]
X1 = X[y == 1]
# (2)分别计算两类数据自变量的协方差矩阵
sigma0 = self.calc_cov(X0)
sigma1