# 关于Distributional Smoothing with Virtual Adversarial Training的理解

下面简单介绍一下LDS：我们先固定模型的参数$\theta$$\theta$，假设输入空间为${R}^{I}$$R^I$，输出空间为$Q$$Q$，以及一个训练样本集合：

$D=\left\{\left({x}^{\left(n\right)},{y}^{\left(n\right)}\right)\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)}\in {R}^{I},{y}^{\left(n\right)}\in Q,n=1,\dots ,n\right\}$

$\begin{array}{rl}{\mathrm{\Delta }}_{KL}\left(r,{x}^{\left(n\right)},\theta \right)& =KL\left[p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)},\theta \right)||p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)}+r,\theta \right)\right]\\ {r}_{v-adv}^{\left(n\right)}& =\mathrm{arg}\underset{r}{max}\left\{{\mathrm{\Delta }}_{KL}\left(r,{x}^{\left(n\right)},\theta \right);||r||\le ϵ\right\}\end{array}$

$LDS\left({x}^{\left(n\right)},\theta \right)=-{\mathrm{\Delta }}_{KL}\left({r}_{v-adv}^{\left(n\right)},{x}^{\left(n\right)},\theta \right)$

$\frac{1}{N}\sum _{n=1}^{N}\mathrm{log}p\left({y}^{\left(n\right)}\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)},\theta \right)+\lambda \frac{1}{N}\sum _{n=1}^{N}LDS\left({x}^{\left(n\right)},\theta \right)$

$LDS\left(x,\theta \right)=-\frac{1}{2{\sigma }^{2}}{ϵ}^{2}||\theta |{|}_{2}^{2}$

$\begin{array}{rl}KL\left[p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)},\theta \right)||p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)}+r,\theta \right)\right]& ={\int }_{-\mathrm{\infty }}^{+\mathrm{\infty }}p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)},\theta \right)\frac{p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)},\theta \right)}{p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)}+r,\theta \right)}dy\\ & ={\int }_{-\mathrm{\infty }}^{+\mathrm{\infty }}\frac{1}{\sqrt{2\pi }\sigma }{e}^{-\frac{\left(y-{\theta }^{T}x{\right)}^{2}}{2{\sigma }^{2}}}-\frac{\left(y-{\theta }^{T}x{\right)}^{2}+\left(y-{\theta }^{T}x-{\theta }^{T}r{\right)}^{2}}{2{\sigma }^{2}}dy\\ & ={\int }_{-\mathrm{\infty }}^{+\mathrm{\infty }}\frac{1}{\sqrt{2\pi }\sigma }{e}^{-\frac{\left(y-{\theta }^{T}x{\right)}^{2}}{2{\sigma }^{2}}}-\frac{{\theta }^{T}r\left(2y-2{\theta }^{T}x-{\theta }^{T}r\right)}{2{\sigma }^{2}}dy\\ & ={\theta }^{T}r{\int }_{-\mathrm{\infty }}^{+\mathrm{\infty }}\frac{1}{\sqrt{2\pi }\sigma }-\frac{y}{{\sigma }^{2}}{e}^{\frac{-\left(y-{\theta }^{T}x{\right)}^{2}}{2{\sigma }^{2}}}dy+\frac{{\theta }^{T}r\left(2{\theta }^{T}x+{\theta }^{T}r\right)}{2{\sigma }^{2}}\\ & ={\theta }^{T}r{\int }_{-\mathrm{\infty }}^{+\mathrm{\infty }}\left(\frac{1}{\sqrt{2\pi }\sigma }{e}^{\frac{-\left(y-{\theta }^{T}x{\right)}^{2}}{2{\sigma }^{2}}}-\frac{1}{\sqrt{2\pi }\sigma }\frac{{\theta }^{T}x}{{\sigma }^{2}}{e}^{\frac{-\left(y-{\theta }^{T}x{\right)}^{2}}{2{\sigma }^{2}}}\right)dy+\frac{{\theta }^{T}r\left(2{\theta }^{T}x+{\theta }^{T}r\right)}{2{\sigma }^{2}}\\ & ={\theta }^{T}r\left(-\frac{{\theta }^{T}x}{{\sigma }^{2}}\right)+\frac{{\theta }^{T}r\left(2{\theta }^{T}x+{\theta }^{T}r\right)}{2{\sigma }^{2}}\\ & =\frac{\left({\theta }^{T}r{\right)}^{2}}{2{\sigma }^{2}}=\frac{||\theta |{|}_{2}^{2}{ϵ}^{2}}{2{\sigma }^{2}}\end{array}$

$LDS\left(x,\theta \right)\simeq -\frac{1}{2}\sigma \left({\theta }^{T}x\right)\left(1-\sigma \left({\theta }^{T}x\right)\right){ϵ}^{2}||\theta |{|}_{2}^{2}$

$\begin{array}{rl}KL\left[p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)},\theta \right)||p\left(y\phantom{\rule{thinmathspace}{0ex}}|\phantom{\rule{thinmathspace}{0ex}}{x}^{\left(n\right)}+r,\theta \right)\right]& =\frac{1}{1+{e}^{-\theta x}}\mathrm{log}\frac{1+{e}^{-{\theta }^{T}x-{\theta }^{T}r}}{1+{e}^{-\theta x}}+\frac{1}{1+{e}^{\theta x}}\mathrm{log}\frac{1+{e}^{{\theta }^{T}x+{\theta }^{T}r}}{1+{e}^{\theta x}}\\ & =-\frac{{e}^{-{\theta }^{T}x}}{\left(1+{e}^{-\theta x}{\right)}^{2}}{\theta }^{T}r-\frac{1}{2}\frac{{e}^{-{\theta }^{T}x}}{\left(1+{e}^{-\theta x}{\right)}^{3}}\left({\theta }^{T}r{\right)}^{2}+\frac{{e}^{{\theta }^{T}x}}{\left(1+{e}^{\theta x}{\right)}^{2}}{\theta }^{T}r+\frac{1}{2}\frac{{e}^{{\theta }^{T}x}}{\left(1+{e}^{\theta x}{\right)}^{3}}\left({\theta }^{T}r{\right)}^{2}\\ & =\frac{1}{2}\frac{{e}^{-{\theta }^{T}x}}{\left(1+{e}^{-{\theta }^{T}x}{\right)}^{2}}\left({\theta }^{T}r{\right)}^{2}\\ & =-\frac{1}{2}\sigma \left({\theta }^{T}x\right)\left(1-\sigma \left({\theta }^{T}x\right)\right){ϵ}^{2}||\theta |{|}_{2}^{2}\end{array}$

${\mathrm{\Delta }}_{KL}\left(r,x,\theta \right)\simeq \frac{1}{2}{r}^{T}H\left(x,\theta \right)r$

$\begin{array}{rl}{r}_{v-adv}\left(x,\theta \right)& \simeq \mathrm{arg}\underset{r}{max}\left\{{r}^{T}H\left(x,\theta \right)r;||r|{|}_{2}\le ϵ\right\}\\ & =ϵ\overline{u\left(x,\theta \right)}\end{array}$

$d←\overline{Hd}$

$\begin{array}{rl}Hd& \simeq \frac{{\mathrm{\nabla }}_{r}{\mathrm{\Delta }}_{KL}\left(r,x,\theta \right){|}_{r=ϵd}-{\mathrm{\nabla }}_{r}{\mathrm{\Delta }}_{KL}\left(r,x,\theta \right){|}_{r=0}}{\xi }\\ & =\frac{{\mathrm{\nabla }}_{r}{\mathrm{\Delta }}_{KL}\left(r,x,\theta \right){|}_{r=ϵd}}{\xi }\end{array}$

$d←\overline{{\mathrm{\nabla }}_{r}{\mathrm{\Delta }}_{KL}\left(r,x,\theta \right){|}_{r=ϵd}}$

• 广告
• 抄袭
• 版权
• 政治
• 色情
• 无意义
• 其他

120