# 线性判别分析

http://blog.csdn.net/u013719780?viewmode=contents

LDA的原理是，将数据通过线性变换(投影)的方法，映射到维度更低纬度的空间中，使得投影后的点满足同类型标签的样本在映射后的空间比较近，不同类型标签的样本在映射后的空间比较远。

## 一、线性判别分析(二类情形)

### 1. 协方差矩阵定义

x,y <script type="math/tex" id="MathJax-Element-2">x, y</script>分别是两个列向量，则 x,y <script type="math/tex" id="MathJax-Element-3">x, y</script>的协方差为

cov(x,y)=E(xx¯)(yy¯)
<script type="math/tex; mode=display" id="MathJax-Element-4">cov(x, y) = E(x-\bar{x})(y-\bar{y})</script>

A=i=1...m,j=1...naij=cov(Xi,Xj)=E(XiXi¯)(XjXj¯)
<script type="math/tex; mode=display" id="MathJax-Element-9">A = \sum_{i=1...m,j=1...n}a_{ij} = cov(X_i, X_j) = E(X_i-\bar{X_i})(X_j-\bar{X_j})</script>

### 2. 算法原理

y=wTx
<script type="math/tex; mode=display" id="MathJax-Element-12">y=w^Tx</script>

D={x(1),x(1),,x(m)},
<script type="math/tex; mode=display" id="MathJax-Element-15">D=\{x^{(1)}, x^{(1)}, …, x^{(m)}\},</script>

D <script type="math/tex" id="MathJax-Element-16">D</script>按照类别标签划分为两类 D1,D2 <script type="math/tex" id="MathJax-Element-17">D_1, D_2</script>, 其中 D1D2=D,D1D2= <script type="math/tex" id="MathJax-Element-18">D_1\bigcup D_2=D, D_1\bigcap D_2=\emptyset</script>,

μ1=1n1x(i)D1x(i),
<script type="math/tex; mode=display" id="MathJax-Element-19">\mu_1 = \frac{1}{n_1} \sum_{x^{(i)} \in D_1} {x^{(i)}},</script>
μ2=1n2x(i)D2x(i),
<script type="math/tex; mode=display" id="MathJax-Element-20">\mu_2 = \frac{1}{n_2} \sum_{x^{(i)} \in D_2} {x^{(i)}},</script>

μ1~=1n1y(i)D1~wTx(i),
<script type="math/tex; mode=display" id="MathJax-Element-21">\tilde{\mu_1} = \frac{1}{n_1} \sum_{y^{(i)} \in \tilde{D_1}} w^T {x^{(i)}},</script>
μ2~=1n2x(i)D2wTx(i),
<script type="math/tex; mode=display" id="MathJax-Element-22">\tilde{\mu_2} = \frac{1}{n_2} \sum_{x^{(i)} \in D_2} w^T {x^{(i)}},</script>

σ~21=1n1y(i)D1~(y(i)μ1~)2=1n1x(i)D1(wTx(i)wTμ1)2=1n1x(i)D1wT(x(i)μ1)(x(i)μ1)Tw,
<script type="math/tex; mode=display" id="MathJax-Element-23"> \tilde{ \sigma}^2_1 = \frac{1}{n_1} \sum_{y^{(i)} \in \tilde{D_1}} ({y^{(i)}-\tilde{\mu_1}})^2 = \frac{1}{n_1} \sum_{x^{(i)} \in {D_1}} ({w^T x^{(i)}-w^T \mu_1})^2 = \frac{1}{n_1} \sum_{x^{(i)} \in {D_1}} w^T ({x^{(i)}- \mu_1}) ({x^{(i)}- \mu_1})^T w ,</script>

σ~22=1n2y(i)D2~(y(i)μ2~)2=1n2x(i)D2wT(x(i)μ2)(x(i)μ2)Tw,
<script type="math/tex; mode=display" id="MathJax-Element-24"> \tilde{ \sigma}^2_2 = \frac{1}{n_2} \sum_{y^{(i)} \in \tilde{D_2}} ({y^{(i)}-\tilde{\mu_2}})^2 = \frac{1}{n_2} \sum_{x^{(i)} \in {D_2}} w^T ({x^{(i)}- \mu_2}) ({x^{(i)}- \mu_2})^T w , </script>

S1=1n1x(i)D1(x(i)μ1)(x(i)μ1)T,
<script type="math/tex; mode=display" id="MathJax-Element-25"> S_1 = \frac{1}{n_1} \sum_{x^{(i)} \in {D_1}} ({x^{(i)}- \mu_1}) ({x^{(i)}- \mu_1})^T, </script>

S2=1n2x(i)D2(x(i)μ2)(x(i)μ2)T,
<script type="math/tex; mode=display" id="MathJax-Element-26"> S_2 = \frac{1}{n_2} \sum_{x^{(i)} \in {D_2}} ({x^{(i)}- \mu_2}) ({x^{(i)}- \mu_2})^T, </script>

σ~21=wTS1w,σ~22=wTS2w.
<script type="math/tex; mode=display" id="MathJax-Element-27">\tilde{ \sigma}^2_1 = w^T S_1 w, \, \tilde{\sigma}^2_2 = w^T S_2 w. </script>

S1~=1n1y(i)D1~(y(i)μ1~)(y(i)μ1~)T,
<script type="math/tex; mode=display" id="MathJax-Element-28"> \tilde{S_1} = \frac{1}{n_1} \sum_{y^{(i)} \in \tilde{D_1}} ({y^{(i)}- \tilde{\mu_1}}) ({y^{(i)}- \tilde{\mu_1}})^T, </script>

S2~=1n2y(i)D2~(y(i)μ2~)(y(i)μ2~)T,
<script type="math/tex; mode=display" id="MathJax-Element-29"> \tilde{S_2} = \frac{1}{n_2} \sum_{y^{(i)} \in \tilde{D_2}} ({y^{(i)}- \tilde{\mu_2}}) ({y^{(i)}- \tilde{\mu_2}})^T, </script>

S1~=wTS1w,S2~=wTS2w,
<script type="math/tex; mode=display" id="MathJax-Element-30"> \tilde{S_1} = w^T S_1 w, \,\,\, \tilde{S_2} = w^T S_2 w, </script>

J(w)=|μ~1μ~2|2S~21+S~22
<script type="math/tex; mode=display" id="MathJax-Element-31"> J(w) = \frac{|\tilde{\mu}_1 - \tilde{\mu}_2|^2 }{\tilde{S}^2_1 + \tilde{S}^2_2 } </script>

|μ~1μ~2|2=wT(μ1μ2)(μ1μ2)Tw=wTSBw,
<script type="math/tex; mode=display" id="MathJax-Element-33"> |\tilde{\mu}_1 - \tilde{\mu}_2|^2 = w^T (\mu_1 - \mu_2) (\mu_1 - \mu_2)^T w = w^T S_B w,</script>

SB=(μ1μ2)(μ1μ2)T.
<script type="math/tex; mode=display" id="MathJax-Element-34"> S_B = (\mu_1 - \mu_2) (\mu_1 - \mu_2)^T .</script>

Sw=S1+S2, <script type="math/tex" id="MathJax-Element-35">S_w = S_1 + S_2,</script> 则

J(w)=wTSBwwTSww
<script type="math/tex; mode=display" id="MathJax-Element-36"> J(w) = \frac{w^T S_B w}{w^T S_w w} </script>

loss(w)=wTSBw(λwTSww1)
<script type="math/tex; mode=display" id="MathJax-Element-37"> loss(w) = w^T S_B w - (\lambda w^T S_w w -1) </script>

dlossdw=2SBw2λSww=0,
<script type="math/tex; mode=display" id="MathJax-Element-38">\frac{dloss}{dw}=2 S_B w - 2 \lambda S_w w = 0, </script>

SBw=λSww.
<script type="math/tex; mode=display" id="MathJax-Element-39"> S_B w = \lambda S_w w. </script>

w=S1w(μ1μ2).
<script type="math/tex; mode=display" id="MathJax-Element-48">w = S_w^{-1}{(\mu_1 - \mu_2)}. </script>

SBw=(μ1μ2)(μ1μ2)Tw=(μ1μ2)λw.
<script type="math/tex; mode=display" id="MathJax-Element-49"> S_B w = (\mu_1 - \mu_2) (\mu_1 - \mu_2)^T w = (\mu_1 - \mu_2) \lambda_w .</script>

S1wSBw=S1w(μ1μ2)λw=λw,
<script type="math/tex; mode=display" id="MathJax-Element-50"> S_w^{-1} S_B w = S_w^{-1} (\mu_1 - \mu_2) \lambda_w = \lambda w , </script>

w=S1w(μ1μ2).
<script type="math/tex; mode=display" id="MathJax-Element-53">w = S_w^{-1}{(\mu_1 - \mu_2)}. </script>

y=wTx,
<script type="math/tex; mode=display" id="MathJax-Element-59"> y = w^T x, </script>

p(y|labeli),
<script type="math/tex; mode=display" id="MathJax-Element-60"> p(y|label_i), </script>

{y~label=mn1y~label=mn2 if xD1 if xD2
<script type="math/tex; mode=display" id="MathJax-Element-64"> \begin{cases} \tilde y_{label}=\frac{m}{n_1} & \text{ if } x\in D_1 \\ \tilde y_{label}=-\frac{m}{n_2} & \text{ if } x\in D_2 \end{cases} </script>

## 算法流程

• 计算类内散度矩阵 Sw <script type="math/tex" id="MathJax-Element-75">S_w</script>；

• 求解向量 w <script type="math/tex" id="MathJax-Element-76">w</script>，其中 w=S1w(μ1μ2) <script type="math/tex" id="MathJax-Element-77">w = S_w^{-1}(\mu_1 - \mu_2)</script>；

• 将原始样本集投影到以 w <script type="math/tex" id="MathJax-Element-78">w</script>为基向量生成的低维空间中(1维)，投影后的样本集就是我们需要的样本集 D' <script type="math/tex" id="MathJax-Element-79">D′</script>(1维特征)。

from sklearn import datasets
import matplotlib.pyplot as plt
%matplotlib inline
import numpy as np
import pandas as pd

def shuffle_data(X, y, seed=None):
if seed:
np.random.seed(seed)

idx = np.arange(X.shape[0])
np.random.shuffle(idx)

return X[idx], y[idx]

# 正规化数据集 X
def normalize(X, axis=-1, p=2):
lp_norm = np.atleast_1d(np.linalg.norm(X, p, axis))
lp_norm[lp_norm == 0] = 1
return X / np.expand_dims(lp_norm, axis)

# 标准化数据集 X
def standardize(X):
X_std = np.zeros(X.shape)
mean = X.mean(axis=0)
std = X.std(axis=0)

# 做除法运算时请永远记住分母不能等于0的情形
# X_std = (X - X.mean(axis=0)) / X.std(axis=0)
for col in range(np.shape(X)[1]):
if std[col]:
X_std[:, col] = (X_std[:, col] - mean[col]) / std[col]

return X_std

# 划分数据集为训练集和测试集
def train_test_split(X, y, test_size=0.2, shuffle=True, seed=None):
if shuffle:
X, y = shuffle_data(X, y, seed)

n_train_samples = int(X.shape[0] * (1-test_size))
x_train, x_test = X[:n_train_samples], X[n_train_samples:]
y_train, y_test = y[:n_train_samples], y[n_train_samples:]

return x_train, x_test, y_train, y_test

def accuracy(y, y_pred):
y = y.reshape(y.shape[0], -1)
y_pred = y_pred.reshape(y_pred.shape[0], -1)
return np.sum(y == y_pred)/len(y)

# 计算矩阵X的协方差矩阵
def calculate_covariance_matrix(X, Y=np.empty((0,0))):
if not Y.any():
Y = X
n_samples = np.shape(X)[0]
covariance_matrix = (1 / (n_samples-1)) * (X - X.mean(axis=0)).T.dot(Y - Y.mean(axis=0))

return np.array(covariance_matrix, dtype=float)

class BiClassLDA():
"""
线性判别分析分类算法(Linear Discriminant Analysis classifier). 既可以用来分类也可以用来降维.
此处实现二类情形(二类情形分类)
"""
def __init__(self):
self.w = None

def transform(self, X, y):
self.fit(X, y)
# Project data onto vector
X_transform = X.dot(self.w)
return X_transform

def fit(self, X, y):
# Separate data by class
X = X.reshape(X.shape[0], -1)

X1 = X[y == 0]
X2 = X[y == 1]
y = y.reshape(y.shape[0], -1)

# 计算两个子集的协方差矩阵
S1 = calculate_covariance_matrix(X1)
S2 = calculate_covariance_matrix(X2)
Sw = S1 + S2

# 计算两个子集的均值
mu1 = X1.mean(axis=0)
mu2 = X2.mean(axis=0)
mean_diff = np.atleast_1d(mu1 - mu2)
mean_diff = mean_diff.reshape(X.shape[1], -1)

# 计算w. 其中w = Sw^(-1)(mu1 - mu2), 这里我求解的是Sw的伪逆, 因为Sw可能是奇异的
self.w = np.linalg.pinv(Sw).dot(mean_diff)

def predict(self, X):
y_pred = []
for sample in X:
sample = sample.reshape(1, sample.shape[0])
h = sample.dot(self.w)
y = 1 * (h[0][0] < 0)
y_pred.append(y)
return y_pred

def main():
# 加载数据
X = data.data
y = data.target

# 只取label=0和1的数据，因为是二分类问题
X = X[y != 2]
y = y[y != 2]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33)

# 训练模型
lda = BiClassLDA()
lda.fit(X_train, y_train)
lda.transform(X_train, y_train)

# 在测试集上预测
y_pred = lda.predict(X_test)
y_pred = np.array(y_pred)
accu = accuracy(y_test, y_pred)
print ("Accuracy:", accu)

if __name__ == "__main__":
main()

04-09 1万+
12-07 7570

05-09 1万+
06-18 3486
12-20 1587
05-02 6663
08-04 7023
11-17 8万+
10-22 6336
07-31 147