# 理论部分

## 高斯混合模型(GMM)

p ( x ) = ∑ k = 1 K ϕ k N ( x ∣ μ k , Σ k ) . p(\mathbf{x})=\sum_{k=1}^K \phi_k\mathcal{N}(\mathbf{x}|\boldsymbol{\mu}_k,\boldsymbol{\Sigma}_k).

N ( x ∣ μ , Σ ) = 1 ( 2 π ) D / 2 1 ∣ Σ ∣ 1 / 2 exp ⁡ { − 1 2 ( x − μ ) T Σ − 1 ( x − μ ) } \mathcal{N}(\mathbf{x}|\boldsymbol{\mu},\boldsymbol{\Sigma})=\frac{1}{(2\pi)^{D/2}}\frac{1}{|\boldsymbol{\Sigma}|^{1/2}}\exp\left\{-\frac{1}{2}(\mathbf{x}-\boldsymbol{\mu})^T\boldsymbol{\Sigma}^{-1}(\mathbf{x}-\boldsymbol{\mu})\right\}
D D 维高斯分布，其中 μ \boldsymbol{\mu} 为其均值向量， Σ \boldsymbol{\Sigma} 为其协方差矩阵。

max ⁡ θ ln ⁡ p ( X ∣ θ ) = ∑ i = 1 n ln ⁡ ∑ k = 1 K ϕ k N ( x i ∣ μ k , Σ k ) . \max_{\theta}\ln p(\mathbf{X}|\theta)=\sum_{i=1}^n\ln\sum_{k=1}^K\phi_k\mathcal{N}(\mathbf{x}_i|\boldsymbol{\mu}_k, \boldsymbol{\Sigma}_k).

## EM算法

ln ⁡ p ( X ∣ θ ˉ ) = ∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X ∣ θ ˉ ) = ∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X , Z ∣ θ ˉ ) p ( Z ∣ X , θ ˉ ) ≤ max ⁡ θ ∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X , Z ∣ θ ) p ( Z ∣ X , θ ˉ ) ( 1 ) ≤ ln ⁡ ∑ Z p ( X , Z ∣ θ m a x ) p ( Z ∣ X , θ ˉ ) p ( Z ∣ X , θ ˉ ) ( J e n s e n 不 等 式 ) = ln ⁡ p ( X ∣ θ m a x ) \begin{aligned} \ln p(\mathbf{X}|\bar{\theta})&amp;=\sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln p(\mathbf{X}|\bar{\theta})\\ &amp;=\sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln\frac{p(\mathbf{X},\mathbf{Z}|\bar{\theta})}{p(\mathbf{Z}|\mathbf{X},\bar{\theta})} \\ &amp;\leq\max_\theta \sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln\frac{p(\mathbf{X},\mathbf{Z}|\theta)}{p(\mathbf{Z}|\mathbf{X},\bar{\theta})} &amp; (1)\\ &amp;\leq \ln \sum_{\mathbf{Z}}\frac{p(\mathbf{X},\mathbf{Z}|\theta_{max})}{p(\mathbf{Z}|\mathbf{X},\bar{\theta})}p(\mathbf{Z}|\mathbf{X},\bar{\theta}) &amp; (Jensen不等式)\\ &amp;=\ln p(\mathbf{X}|\theta_{max}) \end{aligned}

θ m a x = arg ⁡ max ⁡ θ ∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X , Z ∣ θ ) p ( Z ∣ X , θ ˉ ) = arg ⁡ max ⁡ θ ∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X , Z ∣ θ ) . ( 2 ) \begin{aligned} \theta_{max}&amp;=\mathop{\arg \max}_\theta \sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln\frac{p(\mathbf{X},\mathbf{Z}|\theta)}{p(\mathbf{Z}|\mathbf{X},\bar{\theta})}\\&amp;=\mathop{\arg \max}_\theta\sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln p(\mathbf{X},\mathbf{Z}|\theta). &amp; (2) \end{aligned}

###Proposition 1.
∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X , Z ∣ θ ) = ∑ i = 1 n ∑ z i p ( z i ∣ x i , θ ˉ ) ln ⁡ p ( x i , z i ∣ θ ) . ( 3 ) \sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln p(\mathbf{X},\mathbf{Z}|\theta)=\sum_{i=1}^n\sum_{z_i}p(z_i|\mathbf{x}_i,\bar{\theta})\ln p(\mathbf{x}_i,z_i|\theta). \quad (3)
Pf.
∑ Z p ( Z ∣ X , θ ˉ ) ln ⁡ p ( X , Z ∣ θ ) = ∑ Z ∏ j = 1 n p ( z j ∣ x j , θ ˉ ) ln ⁡ ∏ i = 1 n p ( x i , z i ∣ θ ) = ∑ z 1 ∑ z 2 ⋯ ∑ z n ∏ j = 1 n p ( z j ∣ x j , θ ˉ ) ∑ i = 1 n ln ⁡ p ( x i , z i ∣ θ ) = ∑ i = 1 n ∑ z i p ( z i ∣ x i , θ ˉ ) ln ⁡ p ( x i , z i ∣ θ ) ∑ z 1 ∑ z 2 ⋯ ∑ z t , t ̸ = i ⋯ ∑ z n ∏ j = 1 , j ̸ = i n p ( z j ∣ x j , θ ˉ ) = ∑ i = 1 n ∑ z i p ( z i ∣ x i , θ ˉ ) ln ⁡ p ( x i , z i ∣ θ ) ∏ j = 1 , j ̸ = i n ∑ z j p ( z j ∣ x j , θ ˉ ) ( 多 项 式 乘 积 展 开 式 的 逆 ) = ∑ i = 1 n ∑ z i p ( z i ∣ x i , θ ˉ ) ln ⁡ p ( x i , z i ∣ θ ) . ( 概 率 归 一 性 ) □ \begin{aligned} \sum_{\mathbf{Z}}p(\mathbf{Z}|\mathbf{X},\bar{\theta})\ln p(\mathbf{X},\mathbf{Z}|\theta)&amp;=\sum_{\mathbf{Z}}\prod\limits_{j=1}^np(z_j|\mathbf{x}_j,\bar{\theta})\ln \prod\limits_{i=1}^np(\mathbf{x}_i,z_i|\theta)\\ &amp;=\sum_{z_1}\sum_{z_2}\cdots\sum_{z_n}\prod\limits_{j=1}^np(z_j|\mathbf{x}_j,\bar{\theta})\sum_{i=1}^n\ln p(\mathbf{x}_i,z_i|\theta)\\ &amp;=\sum_{i=1}^n\sum_{z_i}p(z_i|\mathbf{x}_i,\bar{\theta})\ln p(\mathbf{x}_i,z_i|\theta)\sum_{z_1}\sum_{z_2}\cdots\sum_{z_t,t\not=i}\cdots\sum_{z_n}\prod\limits_{j=1,j\not=i}^np(z_j|\mathbf{x}_j,\bar{\theta})\\ &amp;=\sum_{i=1}^n\sum_{z_i}p(z_i|\mathbf{x}_i,\bar{\theta})\ln p(\mathbf{x}_i,z_i|\theta)\prod_{j=1,j\not=i}^n\sum_{z_j}p(z_j|\mathbf{x}_j,\bar{\theta}) &amp; (多项式乘积展开式的逆)\\ &amp;=\sum_{i=1}^n\sum_{z_i}p(z_i|\mathbf{x}_i,\bar{\theta})\ln p(\mathbf{x}_i,z_i|\theta). &amp; (概率归一性)\quad\square \end{aligned}

## EM算法求解高斯混合模型

max ⁡ θ ∑ i = 1 n ∑ z i p ( z i ∣ x i , θ ˉ ) ln ⁡ p ( x i , z i ∣ θ ) = ∑ i = 1 n ∑ z i = 1 K p ( z i ∣ x i , θ ˉ ) ln ⁡ ϕ z i N ( x i ∣ μ z i , Σ z i ) = ∑ i = 1 n ∑ k = 1 K p ( k ∣ x i , θ ˉ ) ln ⁡ ϕ k N ( x i ∣ μ k , Σ k ) \begin{aligned} \max_{\theta}\sum_{i=1}^n\sum_{z_i}p(z_i|\mathbf{x}_i,\bar{\theta})\ln p(\mathbf{x}_i,z_i|\theta)&amp;=\sum_{i=1}^n\sum_{z_i=1}^Kp(z_i|\mathbf{x}_i,\bar{\theta})\ln \phi_{z_i}\mathcal{N}(\mathbf{x}_i|\boldsymbol{\mu}_{z_i},\boldsymbol{\Sigma}_{z_i})\\ &amp;=\sum_{i=1}^n\sum_{k=1}^Kp(k|\mathbf{x}_i,\bar{\theta})\ln \phi_{k}\mathcal{N}(\mathbf{x}_i|\boldsymbol{\mu}_{k},\boldsymbol{\Sigma}_{k}) \end{aligned}

max ⁡ ϕ k , μ k , Σ k f ( ϕ k , μ k , Σ k ) = ∑ i = 1 n ∑ k = 1 K γ i k ( ln ⁡ ϕ k − 1 2 ln ⁡ ∣ Σ k ∣ − 1 2 ( x i − μ k ) T Σ k − 1 ( x i − μ k ) ) ( 4 ) \begin{aligned} \max_{\phi_k,\boldsymbol{\mu}_k,\boldsymbol{\Sigma}_k}f(\phi_k,\boldsymbol{\mu}_k,\boldsymbol{\Sigma}_k)=\sum_{i=1}^n\sum_{k=1}^K\gamma_{ik}(\ln\phi_k-\frac{1}{2}\ln|\boldsymbol{\Sigma}_k|-\frac{1}{2}(\mathbf{x}_i-\boldsymbol{\mu}_k)^T\boldsymbol{\Sigma}_k^{-1}(\mathbf{x}_i-\boldsymbol{\mu}_k)) &amp; (4) \end{aligned}
s.t.  ϕ k &gt; 0 ( k = 1 , … , K ) ∑ k = 1 K ϕ k = 1 Σ k ≻ 0 ( k = 1 , … , K ) \begin{aligned} \text{s.t. } &amp; \phi_k&gt;0 &amp; (k=1,\dots,K)\\ &amp;\sum_{k=1}^K\phi_k=1\\ &amp;\boldsymbol{\Sigma}_k\succ0 &amp; (k=1,\dots,K) \end{aligned}

∂ ∂ μ k = − ∑ i = 1 n γ i k Σ k − 1 ( x i − μ k ) = 0 , \frac{\partial}{\partial\boldsymbol{\mu}_k}=-\sum_{i=1}^n\gamma_{ik}\boldsymbol{\Sigma}_k^{-1}(\mathbf{x}_i-\boldsymbol{\mu}_k)=0,
n k = ∑ i = 1 n γ i k n_k=\sum\limits_{i=1}^n\gamma_{ik} ，利用 Σ k \boldsymbol{\Sigma}_k 满秩约束解得
μ k ∗ = 1 n k ∑ i = 1 n γ i k x i . \boldsymbol{\mu}_k^*=\frac{1}{n_k}\sum_{i=1}^n\gamma_{ik}\mathbf{x}_i.

∂ ∂ Σ k − 1 = ∑ i = 1 n γ i k ( adj ( Σ k − 1 ) T ∣ Σ k − 1 ∣ − ( x i − μ k ) ( x i − μ k ) T ) = 0 , \frac{\partial}{\partial\boldsymbol{\Sigma}_k^{-1}}=\sum_{i=1}^n\gamma_{ik}(\frac{\text{adj}(\boldsymbol{\Sigma}_k^{-1})^T}{|\boldsymbol{\Sigma}_k^{-1}|}-(\mathbf{x}_i-\boldsymbol{\mu}_k)(\mathbf{x}_i-\boldsymbol{\mu}_k)^T)=0,

Σ k ∗ = 1 n k ∑ i = 1 n γ i k ( x i − μ k ) ( x i − μ k ) T . \boldsymbol{\Sigma}_k^*=\frac{1}{n_k}\sum_{i=1}^n\gamma_{ik}(\mathbf{x}_i-\boldsymbol{\mu}_k)(\mathbf{x}_i-\boldsymbol{\mu}_k)^T.

L ( ϕ k , λ ) = f ( ϕ k , μ k , Σ k ) + λ ( ∑ k = 1 K ϕ k − 1 ) , L(\phi_k,\lambda)=f(\phi_k,\boldsymbol{\mu}_k,\boldsymbol{\Sigma}_k)+\lambda\left(\sum_{k=1}^K\phi_k-1\right),

{ ∂ ∂ ϕ k L ( ϕ k , λ ) = 1 ϕ k ∑ i = 1 n γ i k + λ = 0 k = 1 , … , K ∑ k = 1 K ϕ k = 1. \left\{ \begin{array}{cc} \frac{\partial}{\partial\phi_k}L(\phi_k,\lambda)=\frac{1}{\phi_k}\sum_{i=1}^n\gamma_{ik}+\lambda=0 &amp; k=1,\dots,K\\ \sum_{k=1}^K\phi_k=1. \end{array}\right.

ϕ k ∗ = n k n , \phi_k^*=\frac{n_k}{n},

p ( k ∣ x i , θ ˉ ) = ϕ ˉ k N ( x i ∣ μ ˉ k , Σ ˉ k ) ∑ k = 1 K ϕ ˉ k N ( x i ∣ μ ˉ k , Σ ˉ k ) . p(k|\mathbf{x}_i,\bar{\theta})=\frac{\bar{\phi}_{k}\mathcal{N}(\mathbf{x}_i|\bar{\boldsymbol{\mu}}_{k},\bar{\boldsymbol{\Sigma}}_{k})}{\sum\limits_{k=1}^K\bar{\phi}_k\mathcal{N}(\mathbf{x}_i|\bar{\boldsymbol{\mu}}_k,\bar{\boldsymbol{\Sigma}}_k)}.

# 实验部分

## 生成数据

%参数区，可自行调整
K = 3;      %高斯模型个数
dim = 3;    %数据维度
N = 1000;    %生成的样本个数
step = 4;   %不同高斯模型均值之间的距离

%变量区
Phi = cell(K,1);         %混合系数构成的cellArray
Mu=cell(K,1);           %均值向量构成的cellArray
Sigma = cell(K,1);      %协方差矩阵构成的cellArray
Dat = zeros(N,dim+1);   %数据矩阵，其中最后一个维度为标签

for i =1:K
Phi(i)={1/K};    %默认每个模型的选择系数相同，可自行调整
end
Mu(1) = {ones(dim,1)};
for i =2:K
t = rand(dim,1);
t = t/norm(t,2)*step;
Mu(i)={Mu{i-1}+t};
end
for i =1:K
t = rand(dim);
t = orth(t);
tt = rand(dim,1);
Sigma(i) = {t'*diag(tt)*t};
end

%这里直接按系数比例生成样本，与直接按照高斯混合分布生成的样本大致是类似的
cnt = 0;
for i = 1:K
if i == K
n = N-cnt;
else
n = round(N*Phi{i});
end
t = mvnrnd(Mu{i}, Sigma{i}, n);
Dat(cnt+1:cnt+n,:) = [t ones(n, 1)*i];
cnt = cnt+n;
end

%存储数据部分
save('Dat.mat', 'Dat');

%画图部分
close all;
view(3);
hold on;
grid on;
mycolor = colorcube(20);
cnt = 0;
for i = 1:K
if i == K
n = N-cnt;
else
n = round(N*Phi{i});
end
t =  Dat(cnt+1:cnt+n,:);
plot3(t(:,1), t(:,2), t(:,3), '.', 'Color', mycolor(i,:));
cnt = cnt+n;
end
hold off;


## EM算法求解GMM

function []=main()
close all;
Eps = 1e-4;  %当迭代前后两次对数似然函数值的相对偏差小于Eps时算法结束
flag = false; %是否使用kmeans初始化参数

N = size(Dat, 1);   %样本个数
dim = size(Dat, 2)-1;   %数据维度
K = max(Dat(:,dim+1));  %高斯模型个数

%初始化参数部分
if flag
%用kmeans初始化参数
[PhiBar, MuBar, SigmaBar] = initByKmeans(Dat(:,1:dim), K);
else
%随机初始化参数
PhiBar = cell(K,1);
MuBar=cell(K,1);
SigmaBar = cell(K,1);
rPhi = rand(3,1);
rPhi = rPhi/(ones(1, 3)*rPhi);
for k=1:K
PhiBar(k) = {rPhi(k)};
MuBar(k) = {mean(Dat(:,1:dim))'.*(1+rand(dim, 1)*1)};
SigmaBar(k) = {eye(dim)};
end
end
logLikeBar = calLogLike(Dat(:,1:dim), K, PhiBar, MuBar, SigmaBar); %计算初始对数似然函数值
Phi= cell(K,1);
Mu=cell(K,1);
Sigma = cell(K,1);

%EM算法
cnt = 0;
while true
%E step:
Gamma = zeros(N, K);
for i = 1:N
Xi = Dat(i,1:dim)';
Pxi = 0;
for k = 1:K
Pxi = Pxi+PhiBar{k}*mvnpdf(Xi, MuBar{k}, SigmaBar{k});
end
for k = 1:K
Gamma(i,k) = PhiBar{k}*mvnpdf(Xi, MuBar{k}, SigmaBar{k})/Pxi;
end
end

%M step:
N_k = ones(1,N)*Gamma;
for k=1:K
Phi(k) = {N_k(k)/N};
mu = zeros(dim, 1);
for i = 1:N
mu = mu+Gamma(i,k)*Dat(i,1:dim)';
end
Mu(k) = {mu/N_k(k)};
sigma = zeros(dim);
for i = 1:N
sigma = sigma+Gamma(i,k)*(Dat(i,1:dim)'-Mu{k})*(Dat(i,1:dim)-Mu{k}');
end
Sigma(k) = {sigma/N_k(k)};
end

cnt = cnt+1;
logLike = calLogLike(Dat(:,1:dim), K, Phi, Mu, Sigma);
relGap = abs(logLike-logLikeBar)/abs(logLikeBar);
disp(['Iteration: ', num2str(cnt), ' RelativeGap: ', sprintf('%.6f LogLikelihood %.6f', relGap, logLike)]);
logLikeBar = logLike;

%绘图部分
%         figure(cnt);
%         view(3);
%         hold on;
%         grid on;
%         mycolor = colorcube(20);
%         for i = 1:N
%             postP = 0;
%             choose = 0;
%             for k = 1:K %利用后验概率最大化准则
%                 temp = Phi{k}*mvnpdf(Dat(i,1:dim)', Mu{k}, Sigma{k});
%                 if temp > postP
%                     postP = temp;
%                     choose = k;
%                 end
%             end
%             plot3(Dat(i,1), Dat(i,2), Dat(i,3),'.', 'Color', mycolor(choose,:), 'markersize', 4);
%         end
%         for k =1:K
%             plot3(Mu{k}(1), Mu{k}(2), Mu{k}(3), 'k.', 'markersize', 15);
%         end
%         saveas(gcf,sprintf('%d.jpg', cnt));

if relGap < Eps
break;
end
PhiBar = Phi;
MuBar = Mu;
SigmaBar = Sigma;
end
end

function [Phi, Mu, Sigma] = initByKmeans(X, K) %Kmeans随机初始化参数
N = size(X,1);
dim = size(X,2);
Phi= cell(K,1);
Mu=cell(K,1);
Sigma = cell(K,1);
for k=1:K
Phi(k) = {0};
Mu(k) = {zeros(dim, 1)};
Sigma(k) = {zeros(dim)};
end
[idx, C] = kmeans(X, K);
for k = 1:K
Mu{k} = C(k,:)';
end
for i = 1:N
k = idx(i);
Phi{k} = Phi{k}+1;
Sigma{k} = Sigma{k}+(X(i,:)'-Mu{k})*(X(i,:)-Mu{k}');
end

for k=1:K
Sigma{k}= Sigma{k}/Phi{k}+1e-6*ones(dim);
Phi{k} = Phi{k}/N;
end
end

function [logLike] = calLogLike(X, K, Phi, Mu, Sigma) %计算对数似然函数值
logLike = 0;
N = size(X,1);
for i =1:N
p = 0;
for k = 1:K
p = p+Phi{k}*mvnpdf(X(i,:)', Mu{k}, Sigma{k});
end
logLike = logLike+log(p);
end
end


# 参考文献

Christopher M… Bishop. Pattern recognition and machine learning. pp. 430-459

Andrew Ng. Machine Learning Lecture Notes.

10-28 1万+

11-29
01-29 5223
04-10 1万+
04-16 7326
06-18 303
©️2020 CSDN 皮肤主题: 大白 设计师:CSDN官方博客