pytorch中的batch_norm, instance_norm, layer_norm, group_norm

计算蓝色块的均值和方差

y = \frac{x-E[x]}{\sqrt{Var[x]+\epsilon }} \times \gamma+\beta

1 BatchNorm

1.1 实现

 Batch Normalization: Accelerating Deep Network Training by Reducing Internal Covariate Shift

http://d2l.ai/chapter_convolutional-modern/batch-norm.html?highlight=batchnorm2d

x \in \mathbb{R}^{B \times C \times H \times W}

y_{tcij} = \frac{x_{tcij}-\mu_{c}}{\sqrt{\sigma_c^2+\epsilon }}*\gamma_c+\beta_c

\mu_c=\frac{1}{BHW}\sum_{t=1}^B\sum_{i=1}^{H}\sum_{j=1}^W x_{tcij}

\sigma_c^2=\frac{1}{BHW}\sum_{t=1}^B\sum_{i=1}^{H}\sum_{j=1}^W \(x_{tcij}-\mu_c\)^2

    import torch
    import torch.nn.functional as F

    # batch norm
    # http://d2l.ai/chapter_convolutional-modern/batch-norm.html?highlight=batchnorm2d

    def batch_norm(input, running_mean, running_var, weight, bias, training=True, momentum=0.1, eps=1e-5):
        reduced_dim = [i for i in range(input.dim()) if i!=1]
        normalized_shape = [1]*len(input.shape)
        normalized_shape[1] = input.shape[1]
        if training:
            mean = input.mean(dim=reduced_dim, keepdims=True)
            var = input.var(dim=reduced_dim, keepdims=True, unbiased=False)
            mean_update = mean.squeeze()
            var_update = input.var(dim=reduced_dim, unbiased=True)
            x_hat = (input-mean)/torch.sqrt(var+eps)
            running_mean.data = (1-momentum)*running_mean+momentum*mean_update
            running_var.data = (1-momentum)*running_var+momentum*var_update
        else:
            x_hat = (input - running_mean.view(*normalized_shape)) / torch.sqrt(running_var.view(*normalized_shape) + eps)
        x = x_hat*weight.view(*normalized_shape)+bias.view(*normalized_shape)
        return x

1.2 测试

    input = torch.randn(100, 8, 6) # [B, D, *]
    num_features = 8
    weight = torch.randn(num_features)
    bias = torch.randn(num_features)

    running_mean = torch.randn(num_features)
    running_var = torch.randn(num_features).abs()
    running_mean0 = torch.clone(running_mean)
    running_var0 = torch.clone(running_var)
    running_mean1 = torch.clone(running_mean)
    running_var1 = torch.clone(running_var)

    training = True
    ans0 = F.batch_norm(input, running_mean0, running_var0, weight, bias, training=training)
    ans1 = batch_norm(input, running_mean1, running_var1, weight, bias, training=training)
    assert torch.square(ans0-ans1).sum()<1e-5
    assert torch.square(running_mean0-running_mean1).sum()<1e-5
    assert torch.square(running_var0-running_var1).sum()<1e-5

    training = False
    ans0 = F.batch_norm(input, running_mean0, running_var0, weight, bias, training=training)
    ans1 = batch_norm(input, running_mean1, running_var1, weight, bias, training=training)
    assert torch.square(ans0-ans1).sum()<1e-5
    assert torch.square(running_mean0-running_mean1).sum()<1e-5
    assert torch.square(running_var0-running_var1).sum()<1e-5

2 InstanceNorm

Instance Normalization: The Missing Ingredient for Fast Stylization

Texture Networks

2.1 实现 

x \in \mathbb{R}^{B \times C \times H \times W}

 y_{tcij} = \frac{x_{tcij}-\mu_{tc}}{\sqrt{\sigma_{tc}^2+\epsilon }}*\gamma_c+\beta_c

 \mu_{tc}=\frac{1}{HW}\sum_{i=1}^{H}\sum_{j=1}^W x_{tcij}

\sigma_{tc}^2=\frac{1}{HW}\sum_{i=1}^{H}\sum_{j=1}^W \(x_{tcij}-\mu_{tc}\)^2

    # instance norm
    # https://github.com/DmitryUlyanov/texture_nets/blob/aad2cc6f8a998fedc77b64bdcfe1e2884aa0fb3e/InstanceNormalization.lua

    def instance_norm(input, running_mean, running_var, weight, bias, training=True, momentum=0.1, eps=1e-5):
        reduced_dim = [i for i in range(input.dim()) if i not in [0, 1]]
        normalized_shape = [1]*len(input.shape)
        normalized_shape[1] = input.shape[1]

        shape = [1]*len(input.shape)
        shape[:2] = input.shape[:2]

        if training:
            mean = input.mean(dim=reduced_dim)
            var = input.var(dim=reduced_dim, unbiased=False)
            mean_update = mean.mean(0)
            var_update = input.var(dim=reduced_dim, unbiased=True).mean(0)

            x_hat = (input-mean.view(*shape))/torch.sqrt(var.view(*shape)+eps)
            running_mean.data = (1-momentum)*running_mean+momentum*mean_update
            running_var.data = (1-momentum)*running_var+momentum*var_update
        else:
            x_hat = (input - running_mean.view(*normalized_shape)) / torch.sqrt(running_var.view(*normalized_shape) + eps)
        x = x_hat*weight.view(*normalized_shape)+bias.view(*normalized_shape)
        return x

2.2 测试

    use_input_stats = True
    ans0 = F.instance_norm(input, running_mean0, running_var0, weight, bias, use_input_stats=use_input_stats)
    ans1 = instance_norm(input, running_mean1, running_var1, weight, bias, training=use_input_stats)
    assert torch.square(ans0-ans1).sum()<1e-5
    assert torch.square(running_mean0-running_mean1).sum()<1e-5
    assert torch.square(running_var0-running_var1).sum()<1e-5

    use_input_stats = False
    ans0 = F.instance_norm(input, running_mean0, running_var0, weight, bias, use_input_stats=use_input_stats)
    ans1 = instance_norm(input, running_mean1, running_var1, weight, bias, training=use_input_stats)
    assert torch.square(ans0-ans1).sum()<1e-5
    assert torch.square(running_mean0-running_mean1).sum()<1e-5
    assert torch.square(running_var0-running_var1).sum()<1e-5

3 LayerNorm

 Layer Normalization

https://stackoverflow.com/questions/59830168/layer-normalization-in-pytorch

3.1 实现

x \in \mathbb{R}^{B \times C_1 \times \dots \times C_n}

C_{k:n} = C_k \times C_{k+1} \times C_n

        normalized\_shape = [C_k, C_{k+1}, \dots, C_n]

\hat{x}, \hat{y} \in \mathbb{R}^{B \times C_{1:k-1} \times C_{k:n}}

\hat{y}_{tci} = \frac{\hat{x}_{tci}-\mu_{tc}}{\sqrt{\sigma_{tc}^2+\epsilon }}*\gamma_{i}+\beta_{i}

\mu_{tc}=\frac{1}{C_{k:n}}\sum_{i=1}^{C_{k:n}} \hat{x}_{tci}

\sigma_{tc}^2=\frac{1}{C_{k:n}}\sum_{i=1}^{C_{k:n}} \(\hat{x}_{tci}-\mu_{tc}\)^2

    # layer_norm
    # https://stackoverflow.com/questions/59830168/layer-normalization-in-pytorch
    def layer_norm(input, normalized_shape, weight, bias, eps=1e-5):
        assert input.shape[-len(normalized_shape):] == normalized_shape
        reduced_dim = [i for j, i in zip(range(len(normalized_shape)), reversed(range(input.dim())))]
        mean = input.mean(dim=reduced_dim, keepdims=True)
        var = input.var(dim=reduced_dim, keepdims=True, unbiased=False)
        return (input - mean) / torch.sqrt(var + eps) * weight + bias

3.2 测试

    input = torch.randn(4, 5, 6)
    normalized_shape = (5, 6)
    weight = torch.randn(*normalized_shape)
    bias = torch.randn(*normalized_shape)

    ans0 = F.layer_norm(input, normalized_shape, weight, bias)
    ans1 = layer_norm(input, normalized_shape, weight, bias)
    assert torch.square(ans0 - ans1).sum() < 1e-5

4 GroupNorm

 Group Normalization

4.1 实现

    def group_norm(input, num_groups, weight, bias, eps=1e-5):
        input_shape = list(input.shape)
        shape = [input_shape[0]]+[num_groups, input_shape[1]//num_groups]+input_shape[2:]
        normalized_shape = [1]*len(input.shape)
        normalized_shape[1] = input.shape[1]
        reduced_dim = [i for i in range(len(shape)-1, 1, -1)]
        input = input.reshape(*shape)
        mean = input.mean(dim=reduced_dim, keepdims=True)
        var = input.var(dim=reduced_dim, keepdims=True, unbiased=False)
        x_hat = (input - mean)/torch.sqrt(var+eps)
        x_hat = x_hat.reshape(*input_shape)
        x = x_hat*weight.view(*normalized_shape)+bias.view(*normalized_shape)
        return x

4.2 测试

    input = torch.randn(100, 8, 4)
    num_features = 8
    num_groups = 2
    weight = torch.randn(num_features)
    bias = torch.randn(num_features)

    ans0 = F.group_norm(input, num_groups, weight, bias)
    ans1 = group_norm(input, num_groups, weight, bias)
    assert torch.square(ans0-ans1).sum()<1e-5

  • 0
    点赞
  • 5
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值