[转载] stochastic depth 代码 torch tensorflow版本

最新推荐文章于 2023-08-11 17:01:25 发布

氵文大师

最新推荐文章于 2023-08-11 17:01:25 发布

阅读量363

点赞数

文章标签： tensorflow paddle 人工智能

原文链接：https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_layers.py

版权

Torch代码:

这是 torchvision 代码的官方实现
https://pytorch.org/vision/main/_modules/torchvision/ops/stochastic_depth.html#stochastic_depth

import torch
import torch.nn as nn

def stochastic_depth(input: torch.Tensor, p: float, mode: str = "row", training: bool = True) -> torch.Tensor:
    """
    Implements the Stochastic Depth from `"Deep Networks with Stochastic Depth"
    <https://arxiv.org/abs/1603.09382>`_ used for randomly dropping residual
    branches of residual architectures.

    Args:
        input (Tensor[N, ...]): The input tensor or arbitrary dimensions with the first one
                    being its batch i.e. a batch with ``N`` rows.
        p (float): probability of the input to be zeroed.
        mode (str): ``"batch"`` or ``"row"``.
                    ``"batch"`` randomly zeroes the entire input, ``"row"`` zeroes
                    randomly selected rows from the batch.
        training: apply stochastic depth if is ``True``. Default: ``True``

    Returns:
        Tensor[N, ...]: The randomly zeroed tensor.
    """
    if p < 0.0 or p > 1.0:
        raise ValueError(f"drop probability has to be between 0 and 1, but got {p}")
    if mode not in ["batch", "row"]:
        raise ValueError(f"mode has to be either 'batch' or 'row', but got {mode}")
    if not training or p == 0.0:
        return input

    survival_rate = 1.0 - p
    if mode == "row":
        size = [input.shape[0]] + [1] * (input.ndim - 1)
    else:
        size = [1] * input.ndim
    noise = torch.empty(size, dtype=input.dtype, device=input.device)
    noise = noise.bernoulli_(survival_rate)
    if survival_rate > 0.0:
        noise.div_(survival_rate)
    return input * noise



if __name__ == "__main__":
    x = torch.Tensor(4, 20)
    out = stochastic_depth(x, 0.2)



class StochasticDepth(nn.Module):
    """
    See :func:`stochastic_depth`.
    """

    def __init__(self, p: float, mode: str) -> None:
        super().__init__()
        self.p = p
        self.mode = mode

    def forward(self, input: torch.Tensor) -> torch.Tensor:
        return stochastic_depth(input, self.p, self.mode, self.training)


    def __repr__(self) -> str:
        s = f"{self.__class__.__name__}(p={self.p}, mode={self.mode})"
        return s

tensorflow 版本：
摘自：
https://github.com/tensorflow/tpu/blob/master/models/official/resnet/resnet_layers.py

import tensorflow as tf


def drop_connect(inputs, is_training, drop_connect_rate):
    """Apply drop connect.

    Args:
        inputs: `Tensor` input tensor.
        is_training: `bool` if True, the model is in training mode.
        drop_connect_rate: `float` drop connect rate.

    Returns:
        A output tensor, which should have the same shape as input.
    """
    if not is_training or drop_connect_rate is None or drop_connect_rate == 0:
        return inputs

    keep_prob = 1.0 - drop_connect_rate
    batch_size = inputs.shape[0]
    random_tensor = keep_prob
    
    random_tensor += tf.random.uniform([batch_size, 1, 1, 1], dtype=inputs.dtype)
    binary_tensor = tf.floor(random_tensor)


    output = tf.divide(inputs, keep_prob) * binary_tensor
    return output


x = tf.random.uniform([4, 10], )
drop_connect(x, True, 0.1)

Drop Connect 和 Stochastic Depth 是同一个东西吗??? 为啥他俩的实现是一样的?

关于这二者名字的讨论，可以看torch的另一个实现，这是timm库的实现：
摘自：
https://github.com/huggingface/pytorch-image-models/blob/main/timm/layers/drop.py#L137

def drop_path(x, drop_prob: float = 0., training: bool = False, scale_by_keep: bool = True):
    """Drop paths (Stochastic Depth) per sample (when applied in main path of residual blocks).

    This is the same as the DropConnect impl I created for EfficientNet, etc networks, however,
    the original name is misleading as 'Drop Connect' is a different form of dropout in a separate paper...
    See discussion: https://github.com/tensorflow/tpu/issues/494#issuecomment-532968956 ... I've opted for
    changing the layer and argument names to 'drop path' rather than mix DropConnect as a layer name and use
    'survival rate' as the argument.

    """
    if drop_prob == 0. or not training:
        return x
    keep_prob = 1 - drop_prob
    shape = (x.shape[0],) + (1,) * (x.ndim - 1)  # work with diff dim tensors, not just 2D ConvNets 
    random_tensor = x.new_empty(shape).bernoulli_(keep_prob)
    if keep_prob > 0.0 and scale_by_keep:
        random_tensor.div_(keep_prob)
    return x * random_tensor


class DropPath(nn.Module):
    """Drop paths (Stochastic Depth) per sample  (when applied in main path of residual blocks).
    """
    def __init__(self, drop_prob: float = 0., scale_by_keep: bool = True):
        super(DropPath, self).__init__()
        self.drop_prob = drop_prob
        self.scale_by_keep = scale_by_keep

    def forward(self, x):
        return drop_path(x, self.drop_prob, self.training, self.scale_by_keep)

    def extra_repr(self):
        return f'drop_prob={round(self.drop_prob,3):0.3f}'