# 适用于稀疏向量、独热编码数据的损失函数回顾和PyTorch实现

• CosineEmbeddingLoss
• Sorenson-Dice Coefficient Loss
• Multi-Task Learning Losses of Individual OHE Components

-解决了上述挑战，包括在PyTorch中实现它们的代码。

## 热编码数据

from sklearn.preprocessing import OneHotEncoder
import numpy as np# Instantiate a column of 10 random integers from 5 classes
x = np.random.randint(5, size=10).reshape(-1,1)print(x)
>>> [[2][3][2][2][1][1][4][1][0][4]]# Instantiate OHE() + Fit/Transform the data
ohe_encoder = OneHotEncoder(categories="auto")
encoded = ohe_encoder.fit_transform(x).todense()print(encoded)
>>> matrix([[0., 1., 0., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 1., 0., 0.],
[1., 0., 0., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 1., 0., 0.],
[0., 0., 0., 1., 0.],
[0., 0., 0., 0., 1.]])print(list(ohe_encoder.get_feature_names()))
>>> ["x0_0", "x0_1", "x0_2", "x0_3", "x0_4"]


## Autoencoders

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optimclass Network(nn.Module):
def __init__(self, input_shape: int):
super().__init__()
self.encode1 = nn.Linear(input_shape, 500)
self.encode2 = nn.Linear(500, 250)
self.encode3 = nn.Linear(250, 50)

self.decode1 = nn.Linear(50, 250)
self.decode2 = nn.Linear(250, 500)
self.decode3 = nn.Linear(500, input_shape)   def encode(self, x: torch.Tensor):
x = F.relu(self.encode1(x))
x = F.relu(self.encode2(x))
x = F.relu(self.encode3(x))
return x   def decode(self, x: torch.Tensor):
x = F.relu(self.decode1(x))
x = F.relu(self.decode2(x))
x = F.relu(self.decode3(x))
return x   def forward(self, x: torch.Tensor):
x = encode(x)
x = decode(x)
return x
def train_model(data: pd.DataFrame):
net = Network()
optimizer = optim.Adagrad(net.parameters(), lr=1e-3, weight_decay=1e-4)
losses = []   for epoch in range(250):
for batch in get_batches(data)
net.zero_grad()

# Pass batch through
output = net(batch)

# Get Loss + Backprop
loss = loss_fn(output, batch).sum() #
losses.append(loss)
loss.backward()
optimizer.step()
return net, losses


## 损失函数的问题

• 一列出现1意味着对应的OHE列必须有一个0。即列不是不相交的
• OHE向量输入的稀疏性会导致系统选择简单地将大多数列返回0以减少误差

## 余弦嵌入损失

loss_function = torch.nn.CosineEmbeddingLoss(reduction='none')# . . . Then during training . . . loss = loss_function(reconstructed, input_data).sum()
loss.backward()


## Dice Loss

Dice Loss是一个实现Sørensen-Dice系数[2],这是非常受欢迎的计算机视觉领域的分割任务。简单地说，它是两个集合之间重叠的度量，并且与两个向量之间的Jaccard距离有关。骰子系数对向量中列值的差异高度敏感，利用这种敏感性有效地区分图像中像素的边缘，因此在图像分割中非常流行。Dice Loss为:

PyTorch没有内部实现的Dice Loss。但是在Kaggle上可以在其丢失函数库- Keras & PyTorch[3]中找到一个很好的实现:

class DiceLoss(nn.Module):
def __init__(self, weight=None, size_average=True):
super(DiceLoss, self).__init__()

def forward(self, inputs, targets, smooth=1):

#comment out if your model contains a sigmoid acitvation
inputs = F.sigmoid(inputs)

#flatten label and prediction tensors
inputs = inputs.view(-1)
targets = targets.view(-1)

intersection = (inputs * targets).sum()
dice = (2.*intersection + smooth)/
(inputs.sum() + targets.sum() + smooth)

return 1 - dice


## 不同OHE列的单个损失函数

from torch.nn.modules import _Loss
from torch import argmaxclass CustomLoss(_Loss):
def __init__(self):
super(CustomLoss, self).__init__()  def forward(self, input, target):
""" loss function called at runtime """

# Class 1 - Indices [0:50]
class_1_loss = F.nll_loss(
F.log_softmax(input[:, 0:50], dim=1),
argmax(target[:, 0:50])
)    # Class 2 - Indices [50:100]
class_2_loss = F.nll_loss(
F.log_softmax(input[:, 50:100], dim=1),
argmax(target[:, 50:100])
)    # Class 3 - Indices [100:150]
class_3_loss = F.nll_loss(
F.log_softmax(input[:, 100:150], dim=1),
argmax(target[:, 100:150])
)    return class_1_loss + class_2_loss + class_3_loss


## 资源

1. D.E. Rumelhart, G.E. Hinton, and R.J. Williams, “Learning internal representations by error propagation.” Parallel Distributed Processing. Vol 1: Foundations. MIT Press, Cambridge, MA, 1986.
2. Sørensen, T. (1948). “A method of establishing groups of equal amplitude in plant sociology based on similarity of species and its application to analyses of the vegetation on Danish commons”. Kongelige Danske Videnskabernes Selskab. 5 (4): 1–34. *AND* Dice, Lee R. (1945). “Measures of the Amount of Ecologic Association Between Species”. Ecology. 26 (3): 297–302.
3. Kaggle’s Loss Function Library: https://www.kaggle.com/bigironsphere/loss-function-library-keras-pytorch

deephub翻译组

• 点赞
• 评论
• 分享
x

海报分享

扫一扫，分享海报

• 收藏
• 手机看

分享到微信朋友圈

x

扫一扫，手机阅读

• 打赏

打赏

deephub

你的鼓励将是我创作的最大动力

C币 余额
2C币 4C币 6C币 10C币 20C币 50C币
• 一键三连

点赞Mark关注该博主, 随时了解TA的最新博文
04-07 268

04-13 6212
03-13 697
02-24 964
01-18
01-16 202
01-29 3751
©️2020 CSDN 皮肤主题: 技术工厂 设计师:CSDN官方博客