1.损失函数简介
损失函数,又叫目标函数,用于计算真实值和预测值之间差异的函数。
pytorch loss基类是_Loss ,其中_Loss又继承Module类
其中每个loss,只需要实现forward就好
其中每次训练的时候都要
loss.backward()
但是我在loss函数里没找到backward()这个函数,就有点奇怪。后来发现原来loss是一个tensor,而tensor中是有backward的,
而且是调用的autograd里面的backward
1.1 L1Loss
ℓ
(
x
,
y
)
=
L
=
{
l
1
,
…
,
l
N
}
⊤
,
l
n
=
∣
x
n
−
y
n
∣
,
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = \left| x_n - y_n \right|,
ℓ(x,y)=L={l1,…,lN}⊤,ln=∣xn−yn∣,
N是batch_size 如果没有设置reduction ,默认采用mean
ℓ
(
x
,
y
)
=
{
mean
(
L
)
,
if reduction
=
’mean’;
sum
(
L
)
,
if reduction
=
’sum’.
\ell(x, y) = \begin{cases} \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} \end{cases}
ℓ(x,y)={mean(L),sum(L),if reduction=’mean’;if reduction=’sum’.
import torch.nn as nn
import torch
def validate_loss(input,target):
return torch.mean(torch.abs(input-target))
loss=nn.L1Loss()
input=torch.randn(3,5,requires_grad=True)
target=torch.randn(3,5)
output=loss(input,target)
print("default loss:", output)
output = validate_loss(input, target)
print("validate loss:", output)
1.2NLLLoss 多分类
https://zhuanlan.zhihu.com/p/338318581
ℓ
(
x
,
y
)
=
L
=
{
l
1
,
…
,
l
N
}
⊤
,
l
n
=
−
w
y
n
x
n
,
y
n
,
w
c
=
weight
[
c
]
⋅
1
{
c
≠
ignore_index
}
\ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_{y_n} x_{n,y_n}, \quad w_{c} = \text{weight}[c] \cdot \mathbb{1}\{c \not= \text{ignore\_index}\}
ℓ(x,y)=L={l1,…,lN}⊤,ln=−wynxn,yn,wc=weight[c]⋅1{c=ignore_index}
x是输入,y是label ,w是类别权重
ℓ ( x , y ) = { ∑ n = 1 N 1 ∑ n = 1 N w y n l n , if reduction = ’mean’; ∑ n = 1 N l n , if reduction = ’sum’. \ell(x, y) = \begin{cases} \sum_{n=1}^N \frac{1}{\sum_{n=1}^N w_{y_n}} l_n, & \text{if reduction} = \text{'mean';}\\ \sum_{n=1}^N l_n, & \text{if reduction} = \text{'sum'.} \end{cases} ℓ(x,y)={∑n=1N∑n=1Nwyn1ln,∑n=1Nln,if reduction=’mean’;if reduction=’sum’.
def validate_loss(input,target):
val = 0
for li_x, li_y in zip(input, target):
val+=li_x[li_y]
return torch.abs(val / len(target))
loss = nn.NLLLoss()
m = nn.LogSoftmax(dim=1)
input=torch.randn(3,5,requires_grad=True)
target = torch.tensor([1, 0, 4])
output = loss(m(input), target)
print("default loss:", output)
output = validate_loss(m(input), target)
print("validate loss:", output)
>>>
>>>
# 2D loss example (used, for example, with image inputs)
def validate_loss(input,target):
val = 0
for li_x, li_y in zip(input, target):
dim0, dim1 = li_y.shape
li_x=li_x.tolist()
li_y=li_y.tolist()
# 遍历张量
for i in range(dim0):
for j in range(dim1):
element = li_y[i][j]
res=li_x[element][i][j]
val+=res
return val / 320
N, C = 5, 4
loss = nn.NLLLoss()
# input is of size N x C x height x width
data = torch.randn(N, 16, 10, 10)
conv = nn.Conv2d(16, C, (3, 3))
m = nn.LogSoftmax(dim=1)
# each element in target has to have 0 <= value < C
target = torch.empty(N, 8, 8, dtype=torch.long).random_(0, C)
input=m(conv(data))
output = loss(input, target)
print("default loss:", output)
output = validate_loss(input, target)
print("validate loss:", output)
1.3PoissonNLLoss
真实标签服从泊松分布的负对数似然损失,神经网络的输出作为泊松分布的参数λ 。
target
∼
P
o
i
s
s
o
n
(
input
)
\text{target} \sim \mathrm{Poisson}(\text{input})
target∼Poisson(input)
loss
(
input
,
target
)
=
input
−
target
∗
log
(
input
)
+
log
(
target!
)
\text{loss}(\text{input}, \text{target}) = \text{input} - \text{target} * \log(\text{input}) + \log(\text{target!})
loss(input,target)=input−target∗log(input)+log(target!)
loss = nn.PoissonNLLLoss()
log_input = torch.randn(5, 2, requires_grad=True)
target = torch.randn(5, 2)
output = loss(log_input, target)
output.backward()
1.4KLDivLoss
l ( x , y ) = L = { l 1 , … , l N } , l n = y n ⋅ ( log y n − x n ) l(x,y) = L = \{ l_1,\dots,l_N \}, \quad l_n = y_n \cdot \left( \log y_n - x_n \right) l(x,y)=L={l1,…,lN},ln=yn⋅(logyn−xn)
ℓ ( x , y ) = { mean ( L ) , if reduction = ’mean’; sum ( L ) , if reduction = ’sum’. \ell(x, y) = \begin{cases} \operatorname{mean}(L), & \text{if reduction} = \text{'mean';} \\ \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} \end{cases} ℓ(x,y)={mean(L),sum(L),if reduction=’mean’;if reduction=’sum’.
import torch
import torch.nn as nn
import math
def validate_loss(output, target):
val = 0
for li_x, li_y in zip(output, target):
for i, xy in enumerate(zip(li_x, li_y)):
x, y = xy
loss_val = y * (math.log(y, math.e) - x)
val += loss_val
return val / output.nelement()
torch.manual_seed(20)
loss = nn.KLDivLoss()
input = torch.Tensor([[-2, -6, -8], [-7, -1, -2], [-1, -9, -2.3], [-1.9, -2.8, -5.4]])
target = torch.Tensor([[0.8, 0.1, 0.1], [0.1, 0.7, 0.2], [0.5, 0.2, 0.3], [0.4, 0.3, 0.3]])
output = loss(input, target)
print("default loss:", output)
output = validate_loss(input, target)
print("validate loss:", output)
loss = nn.KLDivLoss(reduction="batchmean")
output = loss(input, target)
print("batchmean loss:", output)
loss = nn.KLDivLoss(reduction="mean")
output = loss(input, target)
print("mean loss:", output)
loss = nn.KLDivLoss(reduction="none")
output = loss(input, target)
print("none loss:", output)
1.5 MSELoss
ℓ ( x , y ) = L = { l 1 , … , l N } ⊤ , l n = ( x n − y n ) 2 , \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = \left( x_n - y_n \right)^2, ℓ(x,y)=L={l1,…,lN}⊤,ln=(xn−yn)2,
ℓ ( x , y ) = { mean ( L ) , if reduction = ’mean’; sum ( L ) , if reduction = ’sum’. \ell(x, y) = \begin{cases} \operatorname{mean}(L), & \text{if reduction} = \text{'mean';}\\ \operatorname{sum}(L), & \text{if reduction} = \text{'sum'.} \end{cases} ℓ(x,y)={mean(L),sum(L),if reduction=’mean’;if reduction=’sum’.
def validate_loss(input,target):
return torch.mean(torch.pow(input-target,2))
loss = nn.MSELoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.randn(3, 5)
output = loss(input, target)
output.backward()
print("default loss:", output)
output = validate_loss(input, target)
print("validate loss:", output)
1.6BCELoss
ℓ ( x , y ) = L = { l 1 , … , l N } ⊤ , l n = − w n [ y n ⋅ log x n + ( 1 − y n ) ⋅ log ( 1 − x n ) ] , \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_n \left[ y_n \cdot \log x_n + (1 - y_n) \cdot \log (1 - x_n) \right], ℓ(x,y)=L={l1,…,lN}⊤,ln=−wn[yn⋅logxn+(1−yn)⋅log(1−xn)],
def validate_loss(input,target):
return -torch.mean(target*torch.log(input)+(1-target)*torch.log(1-input))
m = nn.Sigmoid()
loss = nn.BCELoss()
input = torch.randn(3, requires_grad=True)
target = torch.empty(3).random_(2)
output = loss(m(input), target)
print("default loss:", output)
output = validate_loss(m(input), target)
print("validate loss:", output)
1.7BCEWithLogitsLoss
ℓ ( x , y ) = L = { l 1 , … , l N } ⊤ , l n = − w n [ y n ⋅ log σ ( x n ) + ( 1 − y n ) ⋅ log ( 1 − σ ( x n ) ) ] , \ell(x, y) = L = \{l_1,\dots,l_N\}^\top, \quad l_n = - w_n \left[ y_n \cdot \log \sigma(x_n) + (1 - y_n) \cdot \log (1 - \sigma(x_n)) \right], ℓ(x,y)=L={l1,…,lN}⊤,ln=−wn[yn⋅logσ(xn)+(1−yn)⋅log(1−σ(xn))],
ℓ c ( x , y ) = L c = { l 1 , c , … , l N , c } ⊤ , l n , c = − w n , c [ p c y n , c ⋅ log σ ( x n , c ) + ( 1 − y n , c ) ⋅ log ( 1 − σ ( x n , c ) ) ] , \ell_c(x, y) = L_c = \{l_{1,c},\dots,l_{N,c}\}^\top, \quad l_{n,c} = - w_{n,c} \left[ p_c y_{n,c} \cdot \log \sigma(x_{n,c}) + (1 - y_{n,c}) \cdot \log (1 - \sigma(x_{n,c})) \right], ℓc(x,y)=Lc={l1,c,…,lN,c}⊤,ln,c=−wn,c[pcyn,c⋅logσ(xn,c)+(1−yn,c)⋅log(1−σ(xn,c))],
def validate_loss(input,target):
return -torch.mean(target*torch.log(input)+(1-target)*torch.log(1-input))
target = torch.ones([10, 64], dtype=torch.float32) # 64 classes, batch size = 10
output = torch.full([10, 64], 1.5) # A prediction (logit)
m=nn.Sigmoid()
pos_weight = torch.ones([64]) # All weights are equal to 1
criterion = torch.nn.BCEWithLogitsLoss()
loss=criterion(output, target) # -log(sigmoid(1.5))
print("default loss:", loss)
validate_loss = validate_loss(m(output), target)
print("validate loss:", validate_loss)
1.8HingeEmbeddingLoss
l n = { x n , if y n = 1 , max { 0 , Δ − x n } , if y n = − 1 , l_n = \begin{cases} x_n, & \text{if}\; y_n = 1,\\ \max \{0, \Delta - x_n\}, & \text{if}\; y_n = -1, \end{cases} ln={xn,max{0,Δ−xn},ifyn=1,ifyn=−1,
1.9MultiLabelMarginLoss
loss ( x , y ) = ∑ i j max ( 0 , 1 − ( x [ y [ j ] ] − x [ i ] ) ) x.size ( 0 ) \text{loss}(x, y) = \sum_{ij}\frac{\max(0, 1 - (x[y[j]] - x[i]))}{\text{x.size}(0)} loss(x,y)=ij∑x.size(0)max(0,1−(x[y[j]]−x[i]))
1.10SmoothL1Loss
loss
(
x
,
y
)
=
1
n
∑
i
z
i
\text{loss}(x, y) = \frac{1}{n} \sum_{i} z_{i}
loss(x,y)=n1i∑zi
z
i
=
{
0.5
(
x
i
−
y
i
)
2
/
b
e
t
a
,
if
∣
x
i
−
y
i
∣
<
b
e
t
a
∣
x
i
−
y
i
∣
−
0.5
∗
b
e
t
a
,
otherwise
z_{i} = \begin{cases} 0.5 (x_i - y_i)^2 / beta, & \text{if } |x_i - y_i| < beta \\ |x_i - y_i| - 0.5 * beta, & \text{otherwise } \end{cases}
zi={0.5(xi−yi)2/beta,∣xi−yi∣−0.5∗beta,if ∣xi−yi∣<betaotherwise
beta is an optional parameter that defaults to 1.
1.11CrossEntropyLoss
loss ( x , c l a s s ) = − log ( exp ( x [ c l a s s ] ) ∑ j exp ( x [ j ] ) ) = − x [ c l a s s ] + log ( ∑ j exp ( x [ j ] ) ) \text{loss}(x, class) = -\log\left(\frac{\exp(x[class])}{\sum_j \exp(x[j])}\right) = -x[class] + \log\left(\sum_j \exp(x[j])\right) loss(x,class)=−log(∑jexp(x[j])exp(x[class]))=−x[class]+log(j∑exp(x[j]))
loss ( x , c l a s s ) = w e i g h t [ c l a s s ] ( − x [ c l a s s ] + log ( ∑ j exp ( x [ j ] ) ) ) \text{loss}(x, class) = weight[class] \left(-x[class] + \log\left(\sum_j \exp(x[j])\right)\right) loss(x,class)=weight[class](−x[class]+log(j∑exp(x[j])))
def validate_loss(input,target):
val = 0
for li_x, li_y in zip(input, target):
val+=li_x[li_y]
return torch.abs(val / len(target))
loss = nn.CrossEntropyLoss()
input = torch.randn(3, 5, requires_grad=True)
target = torch.empty(3, dtype=torch.long).random_(5)
output = loss(input, target)
print("default loss:", output)
m = nn.LogSoftmax(dim=1)
validate_loss = validate_loss(m(input), target)
print("validate loss:", validate_loss)