# 从零开始实现线性判别分析(LDA)算法(多类情形)

http://blog.csdn.net/u013719780?viewmode=contents

W=(w1,w2,,wk),wi(i=1,2,,k),
<script type="math/tex; mode=display" id="MathJax-Element-5">W = (w_1, w_2, …, w_k), \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, w_i (i=1, 2, …, k) 是列向量, </script>

yi=wTix,y=WTx,
<script type="math/tex; mode=display" id="MathJax-Element-6">y_i = w^T_i x, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, y = W^T x, </script>

D <script type="math/tex" id="MathJax-Element-7">D</script>按照类别标签划分为 Dnlabels <script type="math/tex" id="MathJax-Element-8">D_{n_{labels}}</script>类 D1,D2,,Dnlabels <script type="math/tex" id="MathJax-Element-9">D_1, D_2, …, D_{n_{labels}}</script>, 其中 D1D2Dnlabels=D,D1D2Dnlabels= <script type="math/tex" id="MathJax-Element-10">D_1\bigcup D_2\bigcup …\bigcup D_{n_{labels}}=D, D_1\bigcap D_2\bigcap …\bigcap D_{n_{labels}} =\emptyset</script>,

μi=1nix(i)Dix(i),(1)
<script type="math/tex; mode=display" id="MathJax-Element-12">\mu_i = \frac{1}{n_i} \sum_{x^{(i)} \in D_i} {x^{(i)}}, \,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, (1) </script>

μ=1mx(i)Dx(i),(2)
<script type="math/tex; mode=display" id="MathJax-Element-13">\mu = \frac{1}{m} \sum_{x^{(i)} \in D} {x^{(i)}},\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\, (2)</script>

Sw=i=1nlabelsx(k)Di(x(k)μi)(x(k)μi)T,(3)
<script type="math/tex; mode=display" id="MathJax-Element-14"> S_w = \sum_{i=1}^{n_{labels}} \sum_{x^{(k)} { \in D_i}} ({x^{(k)}- \mu_i}) ({x^{(k)}- \mu_i})^T, \,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\, (3)</script>

SB=i=1nlabelsni(μiμ)(μiμ)T,(4)
<script type="math/tex; mode=display" id="MathJax-Element-22"> S_B = \sum_{i=1}^{n_{labels}} n_i (\mu_i - \mu) (\mu_i - \mu)^T ,\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, (4) </script>

SB=i=1nlabelsp(i)(μiμ)(μiμ)T,
<script type="math/tex; mode=display" id="MathJax-Element-23"> S_B = \sum_{i=1}^{n_{labels}} p(i) (\mu_i - \mu) (\mu_i - \mu)^T ,</script>

Sw=i=1nlabelsp(i)nix(k)Dk(x(k)μi)(x(k)μi)T=i=1nlabelsp(i)E{(x(k)μi)(x(k)μi)T|x(k)Dk}.
<script type="math/tex; mode=display" id="MathJax-Element-24"> S_w = \sum_{i=1}^{n_{labels}} \frac{p(i)}{n_i} \sum_{x^{(k)} { \in D_k}} ({x^{(k)}- \mu_i}) ({x^{(k)}- \mu_i})^T = \sum_{i=1}^{n_{labels}} p(i) E\{ { ({x^{(k)}- \mu_i}) ({x^{(k)}- \mu_i})^T} | {x^{(k)} { \in D_k}} \} . </script>

LDA做为一个分类的算法，我们当然希望它所分的类之间耦合度低，类内的聚合度高，即类内离散度矩阵的中的数值要小，而类间离散度矩阵中的数值要大，这样的分类的效果才好。

J(w)=|WTSBW||WTSwW|(5)
<script type="math/tex; mode=display" id="MathJax-Element-33"> J(w) = \frac{|W^T S_B W|}{|W^T S_w W|}\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, (5) </script>

W=argmax|WTSBW||WTSwW|(6)
<script type="math/tex; mode=display" id="MathJax-Element-35"> W^* = argmax \frac{|W^T S_B W|}{|W^T S_w W|}\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, (6) </script>

SBW=λSwW,(7)
<script type="math/tex; mode=display" id="MathJax-Element-42"> S_B W = \lambda S_w W ,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\, (7) </script>

W=argmax|λ|
<script type="math/tex; mode=display" id="MathJax-Element-43"> W^* = argmax \, |\lambda|\,\,\,\,\,\,\,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\, \,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\,\, </script>

### 算法流程

• 计算类内散度矩阵 Sw <script type="math/tex" id="MathJax-Element-59">S_w</script>和类间散度矩阵 SB <script type="math/tex" id="MathJax-Element-60">S_B</script>；

• 计算矩阵 S1wSB <script type="math/tex" id="MathJax-Element-61">S^{-1}_wS_B</script>的特征值；

• 找出矩阵 S1wSB <script type="math/tex" id="MathJax-Element-62">S^{-1}_wS_B</script>最大的 k <script type="math/tex" id="MathJax-Element-63">k</script>个特征值和其对应的 k <script type="math/tex" id="MathJax-Element-64">k</script>个特征向量 (w1,w2,...,wk) <script type="math/tex" id="MathJax-Element-65">(w_1,w_2,...,w_k)</script>；

• 将原始样本集投影到以 (w1,w2,...,wk) <script type="math/tex" id="MathJax-Element-66">(w_1,w_2,...,w_k)</script>为基向量生成的低维空间中(k维)，投影后的样本集就是我们需要的样本集 D' <script type="math/tex" id="MathJax-Element-67">D′</script>。

### LDA算法总结

LDA算法既可以用来降维，又可以用来分类，但是目前来说，主要还是用于降维。在我们进行图像识别图像识别相关的数据分析时，LDA是一个有力的工具。下面总结下LDA算法的优缺点。

LDA算法的主要优点有：

• LDA是监督学习，在分类和降维过程中利用了类别的先验知识，而像PCA这样的无监督学习则无法使用类别先验知识。

• LDA在样本分类信息依赖均值而不是方差的时候，比PCA之类的降维算法效果更好。

• LDA至多投影到 nlabels1 <script type="math/tex" id="MathJax-Element-68">n_{labels}-1</script>维子空间；

• LDA不适合对非高斯分布样本进行降维；

• LDA在样本分类信息依赖方差而不是均值时，效果不好；

• LDA可能过度拟合数据；

• LDA在非线性情形效果不好。

### LDA的一些变种

1、 非参数LDA

2、 正交LDA

3、 一般化LDA

4、 核函数LDA

from __future__ import print_function
import scipy
from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np

def shuffle_data(X, y, seed=None):
if seed:
np.random.seed(seed)

idx = np.arange(X.shape[0])
np.random.shuffle(idx)

return X[idx], y[idx]

# 正规化数据集 X
def normalize(X, axis=-1, p=2):
lp_norm = np.atleast_1d(np.linalg.norm(X, p, axis))
lp_norm[lp_norm == 0] = 1
return X / np.expand_dims(lp_norm, axis)

# 标准化数据集 X
def standardize(X):
X_std = np.zeros(X.shape)
mean = X.mean(axis=0)
std = X.std(axis=0)

# 做除法运算时请永远记住分母不能等于0的情形
# X_std = (X - X.mean(axis=0)) / X.std(axis=0)
for col in range(np.shape(X)[1]):
if std[col]:
X_std[:, col] = (X_std[:, col] - mean[col]) / std[col]

return X_std

# 划分数据集为训练集和测试集
def train_test_split(X, y, test_size=0.2, shuffle=True, seed=None):
if shuffle:
X, y = shuffle_data(X, y, seed)

n_train_samples = int(X.shape[0] * (1-test_size))
x_train, x_test = X[:n_train_samples], X[n_train_samples:]
y_train, y_test = y[:n_train_samples], y[n_train_samples:]

return x_train, x_test, y_train, y_test

def accuracy(y, y_pred):
y = y.reshape(y.shape[0], -1)
y_pred = y_pred.reshape(y_pred.shape[0], -1)
return np.sum(y == y_pred)/len(y)

# 计算矩阵X的协方差矩阵
def calculate_covariance_matrix(X, Y=np.empty((0,0))):
if not Y.any():
Y = X
n_samples = np.shape(X)[0]
covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0))

return np.array(covariance_matrix, dtype=float)

class MultiClassLDA():
"""
线性判别分析分类算法(Linear Discriminant Analysis classifier). 既可以用来分类也可以用来降维.
此处实现二类情形(二类情形分类).

Parameters:
-----------
solver: str
若 solver = 'svd',  则使用SVD(奇异值分解)方法求解矩阵伪逆; 否则直接求解矩阵的逆.
eigen_values:
矩阵SW^(-1).dot(SB)的特征值.
eigen_vectors:
矩阵SW^(-1).dot(SB)的特征值所对应的特征向量.
k: int
投影空间(低维空间)的维度.
"""
def __init__(self, solver="svd"):
self.solver = solver
self.eigen_values = None
self.eigen_vectors = None
self.k = 2

def calculate_scatter_matrices(self, X, y):
n_features = np.shape(X)[1]
labels = np.unique(y)

# 计算类内散布矩阵:SW = sum{sum{(Xi^(k) - Xi_mean).dot((Xi^(k) - Xi_mean).T)}for k}for i
# 和类间散布矩阵: SB = sum{ ni * (Xi_mean - X_mean).dot((Xi_mean - X_mean).T) }
SW = np.empty((n_features, n_features))
SB = np.empty((n_features, n_features))
X_mean = X.mean(axis=0)
for label in labels:
Xi = X[y == label]
SW = SW + (Xi.shape[0] - 1) * calculate_covariance_matrix(Xi)

Xi_mean = Xi.mean(axis=0)
SB = SB + Xi.shape[0] * (Xi_mean - X_mean).dot((Xi_mean - X_mean).T)
return SW, SB

def transform(self, X, y):
SW, SB = self.calculate_scatter_matrices(X, y)

# 使用SVD(奇异值分解)求解SW伪逆.
if self.solver == "svd":
# Calculate SW^-1 * SB by SVD (pseudoinverse of diagonal matrix S)
U, S, V = np.linalg.svd(SW)
S = np.diag(S)
SW_inverse = V.dot(np.linalg.pinv(S)).dot(U.T)
A = SW_inverse.dot(SB)

# 直接求解矩阵SW的逆矩阵.
else:
A = np.linalg.inv(SW).dot(SB)

# 求解矩阵A的特征值和对应的特征向量
self.eigen_values, self.eigen_vectors = np.linalg.eigh(A)

# 将特征值按照其绝对值从大到小进行排序(因为SW, SB是对称阵, 故A也是对称阵,
# 因此A的特征值是非负的), 取其前k个特征值以及对应的k个特征向量
idx = self.eigen_values.argsort()[::-1]
topk_eigen_values = self.eigen_values[idx][:self.k]
topk_eigen_vectors = self.eigen_vectors[:, idx][:, :self.k]

# 将样本投影到低维空间
X_transformed = X.dot(topk_eigen_vectors)

return X_transformed

# Plot the dataset X and the corresponding labels y in 2D using the LDA
# transformation.
def visualization(self, X, y, title=None):
X_transformed = self.transform(X, y)
x1 = X_transformed[:, 0]
x2 = X_transformed[:, 1]
plt.scatter(x1, x2, c=y)
if title:
plt.title(title)
plt.show()

def main():
X = normalize(data.data)
y = data.target

# Project the data onto the 2 primary components
multi_class_lda = MultiClassLDA()
multi_class_lda.visualization(X, y, title="LDA")

if __name__ == "__main__":
main()


08-09

05-09 1万+
07-09 230
10-07 2305
03-17
09-26
10-20 3223
11-06
11-17 8万+