# 深度玻尔兹曼机的推导过程

$E\left(v,{h}^{\left(1\right)},{h}^{\left(2\right)}\right)={v}^{T}{W}^{\left(1\right)}{h}^{\left(1\right)}+{h}^{\left(1{\right)}^{T}}{W}^{\left(2\right)}{h}^{\left(2\right)}$

$p\left(v,{h}^{\left(1\right)},{h}^{\left(2\right)}\right)=\frac{1}{Z}\mathrm{exp}\left(-E\left(v,{h}^{\left(1\right)},{h}^{\left(2\right)}\right)\right)$

$\begin{array}{c}P\left({v}_{i}=1|{h}^{\left(1\right)}\right)=\sigma \left({W}_{i,:}^{\left(1\right)}{h}^{\left(1\right)}\right)\\ P\left({h}_{i}^{\left(1\right)}=1|v,{h}^{\left(2\right)}\right)=\sigma \left({v}^{T}{W}_{:,i}^{\left(1\right)}+{W}_{i,:}^{\left(2\right)}{h}^{\left(2\right)}\right)\\ P\left({h}_{k}^{\left(2\right)}=1|{h}^{\left(1\right)}\right)=\sigma \left({h}^{\left(1{\right)}^{T}}{W}_{:,k}^{\left(2\right)}\right)\end{array}$

$\begin{array}{rl}\mathcal{L}& =\frac{1}{m}\sum _{i=1}^{m}\mathrm{log}p\left({\stackrel{^}{v}}^{\left(i\right)}\right)\\ & =\frac{1}{m}\sum _{i=1}^{m}\mathrm{log}\sum _{h}p\left({\stackrel{^}{v}}^{\left(i\right)},h\right)\\ & =\frac{1}{m}\sum _{i=1}^{m}\mathrm{log}\frac{\sum _{h}\mathrm{exp}\left(-E\left({\stackrel{^}{v}}^{\left(n\right)},h\right)\right)}{\sum _{v,h}\mathrm{exp}\left(-E\left(v,h\right)\right)}\end{array}$

$\begin{array}{rl}\frac{\mathrm{\partial }\mathrm{log}p\left({\stackrel{^}{v}}^{\left(i\right)}\right)}{\mathrm{\partial }{\omega }_{ij}}& =\frac{\mathrm{\partial }\mathrm{log}\sum _{h}p\left({\stackrel{^}{v}}^{\left(i\right)},h\right)}{\mathrm{\partial }{\omega }_{ij}}\\ & =\sum _{h}\frac{\mathrm{\partial }\left(\mathrm{log}\sum _{h}\mathrm{exp}\left(-E\left({\stackrel{^}{v}}^{\left(i\right)},h\right)\right)-\mathrm{log}\sum _{v,h}\mathrm{exp}\left(-E\left(v,h\right)\right)\right)}{\mathrm{\partial }{\omega }_{ij}}\\ & =\sum _{h}\frac{\mathrm{exp}\left(-E\left({\stackrel{^}{v}}^{\left(i\right)},h\right)\right)}{\sum _{h}\mathrm{exp}\left(-E\left({\stackrel{^}{v}}^{\left(i\right)},h\right)\right)}{v}_{i}{h}_{j}^{\left(1\right)}-\sum _{v,h}\frac{\mathrm{exp}\left(-E\left(v,h\right)\right)}{\sum _{v,h}\mathrm{exp}\left(-E\left(v,h\right)\right)}{v}_{i}{h}_{j}^{\left(1\right)}\\ & =\sum _{h}p\left(h|{\stackrel{^}{v}}^{\left(i\right)}\right){v}_{i}{h}_{j}^{\left(1\right)}-\sum _{v,h}p\left(v,h\right){v}_{i}{h}_{j}^{\left(1\right)}\end{array}$

$\frac{\mathrm{\partial }\mathrm{log}p\left({\stackrel{^}{v}}^{\left(i\right)}\right)}{\mathrm{\partial }{W}^{\left(1\right)}}={\mathbb{E}}_{p\left(h|{\stackrel{^}{v}}^{\left(i\right)}\right)}\left[v{h}^{\left(1{\right)}^{T}}\right]-{\mathbb{E}}_{p\left(v,h\right)}\left[v{h}^{\left(1{\right)}^{T}}\right]$

$\frac{\mathrm{\partial }\mathcal{L}}{\mathrm{\partial }{W}^{\left(1\right)}}=\frac{1}{m}\sum _{i=1}^{m}{\mathbb{E}}_{p\left(h|{\stackrel{^}{v}}^{\left(i\right)}\right)}\left[v{h}^{\left(1{\right)}^{T}}\right]-{\mathbb{E}}_{p\left(v,h\right)}\left[v{h}^{\left(1{\right)}^{T}}\right]$

$Q\left({h}^{\left(1\right)},{h}^{\left(2\right)}|v\right)=\prod _{j}Q\left({h}_{j}^{\left(1\right)}|v\right)\prod _{k}Q\left({h}_{k}^{\left(2\right)}|v\right)$

$\begin{array}{rl}L& =\mathrm{ln}P\left(v\right)-D\left(Q\left(h|v\right)||P\left(h|v\right)\right)\\ & =\mathbb{E}\left[\mathrm{ln}P\left(v\right)+\mathrm{ln}\frac{P\left(h|v\right)}{Q\left(h|v\right)}\right]\\ & =\mathbb{E}\left[\mathrm{ln}\frac{P\left(h|v\right)P\left(v\right)}{Q\left(h|v\right)}\right]\end{array}$

$\begin{array}{rl}L& =\mathbb{E}\left[\mathrm{ln}\frac{P\left(v,h\right)}{Q\left(h|v\right)}\right]\\ & =\mathbb{E}\left[\mathrm{ln}\frac{P\left(v,h\right)}{Q\left(h|v\right)}\right]\\ & =\mathbb{E}\left[\mathrm{ln}P\left(v,h\right)\right]-\mathbb{E}\left[\mathrm{ln}Q\left(h|v\right)\right]\\ & =\sum _{h}Q\left(h|v\right)\mathrm{ln}P\left(v,h\right)+H\left(Q\right)\end{array}$

$Q\left({h}^{\left(1\right)},{h}^{\left(2\right)}|v\right)=\prod _{j}\left({\stackrel{^}{h}}_{j}^{\left(1\right)}{\right)}^{{h}_{j}^{\left(1\right)}}\left(1-{\stackrel{^}{h}}_{j}^{\left(1\right)}{\right)}^{\left(1-{h}_{j}^{\left(1\right)}\right)}×\prod _{k}\left({\stackrel{^}{h}}_{k}^{\left(2\right)}{\right)}^{{h}_{k}^{\left(2\right)}}\left(1-{\stackrel{^}{h}}_{k}^{\left(2\right)}{\right)}^{\left(1-{h}_{k}^{\left(2\right)}\right)}$

$\begin{array}{c}\frac{\mathrm{\partial }L}{\mathrm{\partial }{\stackrel{^}{h}}_{j}^{\left(1\right)}}=0\phantom{\rule{1em}{0ex}}\frac{\mathrm{\partial }L}{\mathrm{\partial }{\stackrel{^}{h}}_{j}^{\left(2\right)}}=0\end{array}$

$\begin{array}{c}{\stackrel{^}{h}}_{j}^{\left(1\right)}=\sigma \left(\sum _{i}{v}_{i}{W}_{i,j}^{\left(1\right)}+\sum _{{k}^{\prime }}{W}_{j,{k}^{\prime }}^{\left(2\right)}{\stackrel{^}{h}}_{{k}^{\prime }}^{\left(2\right)}\right)\\ {\stackrel{^}{h}}_{k}^{\left(2\right)}=\sigma \left(\sum _{{j}^{\prime }}{W}_{{j}^{\prime },k}^{\left(2\right)}{\stackrel{^}{h}}_{{j}^{\prime }}^{\left(1\right)}\right)\end{array}$

$\begin{array}{rl}L& =\mathbb{E}\left[\mathrm{ln}P\left(v,h\right)\right]+H\left(Q\right)\\ & =-\mathbb{E}\left[E\left(v,h\right)\right]-\mathrm{ln}Z+H\left(Q\right)\end{array}$

$\mathbb{E}\left[{h}^{\left(1\right)}\right]={\stackrel{^}{h}}^{\left(1\right)}\phantom{\rule{1em}{0ex}}\mathbb{E}\left[{h}^{\left(2\right)}\right]={\stackrel{^}{h}}^{\left(2\right)}$

${h}^{\left(1\right)}$$h^{(1)}$${h}^{\left(2\right)}$$h^{(2)}$$Q$$Q$分布下是独立的，所以
$\mathbb{E}\left[{h}^{\left(1\right)}{h}^{\left(2{\right)}^{T}}\right]=\mathbb{E}\left[{h}^{\left(1\right)}\right]\mathbb{E}\left[{h}^{\left(2\right)}\right]={\stackrel{^}{h}}^{\left(1\right)}{\stackrel{^}{h}}^{\left(2\right)}$

$\begin{array}{rl}\mathbb{E}\left[{h}^{\left(1{\right)}^{T}}{W}^{\left(2\right)}{h}^{\left(2\right)}\right]& =\mathbb{E}\left[tr\left({h}^{\left(1{\right)}^{T}}{W}^{\left(2\right)}{h}^{\left(2\right)}\right)\right]\\ & =\mathbb{E}\left[tr\left({W}^{\left(2\right)}{h}^{\left(2\right)}{h}^{\left(1{\right)}^{T}}\right)\right]\\ & =tr\left({W}^{\left(2\right)}\mathbb{E}\left[{h}^{\left(2\right)}{h}^{\left(1{\right)}^{T}}\right]\right)\\ & =tr\left({W}^{\left(2\right)}{\stackrel{^}{h}}^{\left(2\right)}{\stackrel{^}{h}}^{\left(1{\right)}^{T}}\right)\\ & ={\stackrel{^}{h}}^{\left(1{\right)}^{T}}{W}^{\left(2\right)}{\stackrel{^}{h}}^{\left(2\right)}\end{array}$

$L={v}^{T}{W}^{\left(1\right)}{\stackrel{^}{h}}^{\left(1\right)}+{\stackrel{^}{h}}^{\left(1{\right)}^{T}}{W}^{\left(2\right)}{\stackrel{^}{h}}^{\left(2\right)}-\mathrm{log}Z+H\left(Q\right)$

$\mathcal{L}\mathcal{L}=\frac{1}{m}\sum _{i=1}^{m}L\left({\stackrel{^}{v}}^{\left(i\right)}\right)$

$\begin{array}{c}\frac{\mathrm{\partial }\mathcal{L}\mathcal{L}}{\mathrm{\partial }{W}^{\left(1\right)}}=\frac{1}{m}\sum _{i=1}^{m}{v}^{T}{\stackrel{^}{h}}^{\left(1\right)}-\frac{\mathrm{\partial }\mathrm{ln}Z}{\mathrm{\partial }{W}^{\left(1\right)}}\\ \frac{\mathrm{\partial }\mathcal{L}\mathcal{L}}{\mathrm{\partial }{W}^{\left(2\right)}}=\frac{1}{m}\sum _{i=1}^{m}{\stackrel{^}{h}}^{\left(1{\right)}^{T}}{\stackrel{^}{h}}^{\left(2\right)}-\frac{\mathrm{\partial }\mathrm{ln}Z}{\mathrm{\partial }{W}^{\left(2\right)}}\end{array}$

$\begin{array}{rl}\frac{\mathrm{\partial }\mathrm{ln}Z}{\mathrm{\partial }{W}^{\left(1\right)}}& =\frac{\mathrm{ln}\sum _{v}\sum _{h}P\left(v,h\right)}{{W}^{\left(1\right)}}\\ & =\frac{\mathrm{exp}\left(-E\left(v,h\right)\right)}{\sum _{v}\sum _{h}\mathrm{exp}\left(-E\left(v,h\right)\right)}×\left(\frac{\mathrm{\partial }\left(-E\left(v,h\right)\right)}{\mathrm{\partial }{W}^{\left(1\right)}}\right)\\ & ={\mathbb{E}}_{P\left(v,h\right)}\left[{v}^{T}{h}^{\left(1\right)}\right]\end{array}$

$\sum _{y}p\left(y\right)f\left(y\right)\approx \frac{1}{m}\sum _{j=1}^{m}f\left({y}^{j}\right)$