# 关于softmax损失函数的推导

softmax classifier的损失函数如下:
loss(X,Y)=1Nij1{j=y(i)}log(pi,j)$loss(X,Y) = -\frac{1}{N}\sum_{i}\sum_{j}1\{j=y^{(i)}\}log(p_{i,j})$

ij1{j=y(i)}log(pi,j)=i[1{j=y(i)}log(pi,j)+cj1{c=y(i)}log(pi,c)]$\sum_{i}\sum_{j}1\{j=y^{(i)}\}log(p_{i,j}) = \sum_{i} [1\{j=y^{(i)}\}log(p_{i,j})+\sum_{c\neq j}1\{c=y^{(i)}\}log(p_{i,c}) ]$

loss(X,Y)=1N[i[1{j=y(i)}log(pi,j)+cj1{c=y(i)}log(pi,c)]]$loss(X,Y) = -\frac{1}{N}[\sum_{i} [1\{j=y^{(i)}\}log(p_{i,j})+\sum_{c\neq j}1\{c=y^{(i)}\}log(p_{i,c})]]$

lossxi,j=1N[1{j=y(i)}1pi,jpi,jxi,j+cj1{c=y(i)}1pi,cpi,cxi,j]$\frac{\partial loss}{\partial x_{i,j}} = -\frac{1}{N}[1\{j=y^{(i)}\}\frac{1}{p_{i,j}}\frac{\partial p_{i,j}}{\partial x_{i,j}}+\sum_{c\neq j}1\{c=y^{(i)}\}\frac{1}{p_{i,c}}\frac{\partial p_{i,c}}{\partial x_{i,j}}]$

pi,j=exp(xi,j)jexp(xi,j)$p_{i,j} = \frac{exp(x_{i,j})}{\sum_j exp(x_{i,j})}$
pi,jxi,j=exp(xi,j)jexp(xi,j))exp(xi,j)exp(xi,j))[jexp(xi,j)]2=exp(xi,j)jexp(xi,j)jexp(xi,j)exp(xi,j)jexp(xi,j)=pi,j(1pi,j)$\frac{\partial p_{i,j}}{\partial x_{i,j}} = \frac{exp(x_{i,j})\sum_{j}exp(x_{i,j})) - exp(x_{i,j}) exp(x_{i,j}))}{[\sum_{j}exp(x_{i,j})]^2} = \frac{exp(x_{i,j})}{\sum_{j}exp(x_{i,j})} \frac{ \sum_{j}exp(x_{i,j}) -exp(x_{i,j}) }{ \sum_{j} exp(x_{i,j}) } = p_{i,j}(1-p_{i,j})$

pi,c=exp(xi,c)jexp(xi,j)$p_{i,c} = \frac{exp(x_{i,c})}{\sum_j exp(x_{i,j})}$,
pi,cxi,j=exp(xi,c)exp(xi,j)[jexp(xi,j)]2=exp(xi,c)jexp(xi,j)exp(xi,j)jexp(xi,j)=pi,c(pi,j)$\frac{\partial p_{i,c}}{\partial x_{i,j}} = \frac{- exp(x_{i,c}) exp(x_{i,j})}{[\sum_{j}exp(x_{i,j})]^2} = \frac{exp(x_{i,c})}{\sum_{j}exp(x_{i,j})} \frac{-exp(x_{i,j}) }{ \sum_{j} exp(x_{i,j}) } = p_{i,c}(-p_{i,j})$

lossxi,j=1N[1{j=y(i)}1pi,jpi,jxi,j+cj1{c=y(i)}1pi,cpi,cxi,j]$\frac{\partial loss}{\partial x_{i,j}} = -\frac{1}{N}[1\{j=y^{(i)}\}\frac{1}{p_{i,j}}\frac{\partial p_{i,j}}{\partial x_{i,j}}+\sum_{c\neq j}1\{c=y^{(i)}\}\frac{1}{p_{i,c}}\frac{\partial p_{i,c}}{\partial x_{i,j}} ]$
=1N[1{j=y(i)}1pi,jpi,j(1pi,j)+cj1{c=y(i)}1pi,cpi,c(pi,j)]$=-\frac{1}{N}[ 1\{j=y^{(i)}\}\frac{1}{p_{i,j}} p_{i,j}(1-p_{i,j}) + \sum_{c\neq j}1\{c=y^{(i)}\}\frac{1}{p_{i,c}} p_{i,c}(-p_{i,j}) ]$
=1N[1{j=y(i)}(1pi,j)+cj1{c=y(i)}(pi,j)]$=-\frac{1}{N}[ 1\{j=y^{(i)}\}(1-p_{i,j}) + \sum_{c\neq j}1\{c=y^{(i)}\}(-p_{i,j}) ]$
=1N[1{j=y(i)}1{j=y(i)}pi,jcj1{c=y(i)}pi,j]$=-\frac{1}{N}[ 1\{j=y^{(i)}\}- 1\{j=y^{(i)}\} p_{i,j} - \sum_{c\neq j}1\{c=y^{(i)}\}p_{i,j} ]$
=1N[1{j=y(i)}j1{j=y(i)}pi,j]$=-\frac{1}{N}[ 1\{j=y^{(i)}\} - \sum_{j}1\{j=y^{(i)}\}p_{i,j} ]$
=1N[1{j=y(i)}pi,j]$=-\frac{1}{N}[ 1\{j=y^{(i)}\} - p_{i,j} ]$
=1N[pi,j1{j=y(i)}]$=\frac{1}{N}[ p_{i,j} - 1\{j=y^{(i)}\} ]$

©️2019 CSDN 皮肤主题: 大白 设计师: CSDN官方博客