Torch代码:
这是 torchvision 代码的官方实现
https://pytorch.org/vision/main/_modules/torchvision/ops/stochastic_depth.html#stochastic_depth
import torch
import torch.nn as nn
def stochastic_depth(input: torch.Tensor, p: float, mode: str = "row", training: bool = True) -> torch.Tensor:
"""
Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth"
<https://arxiv.org/abs/1603.09382>`_ used for randomly dropping residual
branches of residual architectures.
Args:
input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one
being its batch i.e. a batch with ``N`` rows.
p (float): probability of the input to be zeroed.
mode (str): ``"batch"`` or ``"row"``.
``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes
randomly selected rows from the batch.
training: apply stochastic depth if is ``True``. Default: ``True``
Returns:
Tensor[N, ...]: The randomly zeroed tensor.
"""
if p < 0.0 or p > 1.0:
raise ValueError(f"drop probability has to be between 0 and 1, but got {p}")
if mode not in ["batch", "row"]:
raise ValueError(f"mode has to be either 'batch' or 'row', but got {mode}")
if not training or p == 0.0:
return input
survival_rate = 1.0 - p
if mode == "row":
size = [input.shape[0]] + [1] * (input.ndim - 1)
else:
size = [1] * input.ndim
noise = torch.empty(size, dtype=input.dtype, device=input.device)
noise = noise.bernoulli_(survival_rate)
if survival_rate > 0.0:
noise.div_(survival_rate)
return input * noise
if __name__ == "__main__":
x = torch.Tensor(4, 20)
out = stochastic_depth(x, 0.2)
class StochasticDepth(nn.Module):
"""
See :func:`stochastic_depth`.
"""
def __init__(self, p: float, mode: str) -> None:
super().__init__()
self.p = p
self.mode = mode
def forward(self, input: torch.Tensor) -> torch.Tensor:
return stochastic_depth(input, self.p, self.mode, self.training)
def __repr__(self) -> str:
s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})"
return s
tensorflow 版本:
摘自:
https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_layers.py
import tensorflow as tf
def drop_connect(inputs, is_training, drop_connect_rate):
"""Apply drop connect.
Args:
inputs: `Tensor` input tensor.
is_training: `bool` if True, the model is in training mode.
drop_connect_rate: `float` drop connect rate.
Returns:
A output tensor, which should have the same shape as input.
"""
if not is_training or drop_connect_rate is None or drop_connect_rate == 0:
return inputs
keep_prob = 1.0 - drop_connect_rate
batch_size = inputs.shape[0]
random_tensor = keep_prob
random_tensor += tf.random.uniform([batch_size, 1, 1, 1], dtype=inputs.dtype)
binary_tensor = tf.floor(random_tensor)
output = tf.divide(inputs, keep_prob) * binary_tensor
return output
x = tf.random.uniform([4, 10], )
drop_connect(x, True, 0.1)
Drop Connect 和 Stochastic Depth 是同一个东西吗??? 为啥他俩的实现是一样的?
关于这二者名字的讨论,可以看torch的另一个实现,这是timm库的实现:
摘自:
https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py#L137
def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
'survival rate' as the argument.
"""
if drop_prob == 0. or not training:
return x
keep_prob = 1 - drop_prob
shape = (x.shape[0],) + (1,) * (x.ndim - 1) # work with diff dim tensors, not just 2D ConvNets
random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
if keep_prob > 0.0 and scale_by_keep:
random_tensor.div_(keep_prob)
return x * random_tensor
class DropPath(nn.Module):
"""Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).
"""
def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
super(DropPath, self).__init__()
self.drop_prob = drop_prob
self.scale_by_keep = scale_by_keep
def forward(self, x):
return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)
def extra_repr(self):
return f'drop_prob={round(self.drop_prob,3):0.3f}'