

FLOPS:注意全大写,是floating point operations per second的缩写,指每秒浮点运算次数,理解为计算速度。是一个衡量硬件性能的指标。

FLOPs:注意s小写,是floating point operations的缩写(s表复数),指浮点运算数,理解为计算量。可以用来衡量算法/模型的复杂度。

MACs:乘加运算(Multiplication and Accumulation),相当于2次浮点运算,硬件支持乘加指令可加快计算速度。

1. conv 计算
def compute_conv2d_flops(mod, input_shape = None, output_shape = None, macs = False):
    _, cin, _, _ = input_shape
    _, _, h, w, = output_shape
    w_cout, w_cin, w_h, w_w =

    if mod.groups != 1:
        input_channels = 1
        assert cin == w_cin
        input_channels = w_cin

    output_channels = w_cout
    stride = mod.stride[0]
#     flops = h * w * output_channels * input_channels * w_h * w_w / (stride**2)
    flops = h * w * output_channels * input_channels * w_h * w_w

    if not macs:
        flops_bias = output_shape[1:].numel() if mod.bias is not None else 0
        flops = 2 * flops + flops_bias
    return int(flops)
2. fc 计算
def compute_fc_flops(mod, input_shape = None, output_shape = None, macs = False):
    ft_in, ft_out =
    flops = ft_in * ft_out
    if not macs:
        flops_bias = ft_out if mod.bias is not None else 0
        flops = 2 * flops + flops_bias
    return int(flops)

def compute_bn2d_flops(mod, input_shape = None, output_shape = None, macs = False):
    # subtract, divide, gamma, beta
    flops = 2 * input_shape[1:].numel()
    if not macs:
        flops *= 2
    return int(flops)
3. relu 计算
def compute_relu_flops(mod, input_shape = None, output_shape = None, macs = False):
    flops = 0
    if not macs:
        flops = input_shape[1:].numel()

    return int(flops)
4. maxpool 计算
def compute_maxpool2d_flops(mod, input_shape = None, output_shape = None, macs = False):

    flops = 0
    if not macs:
        flops = mod.kernel_size**2 * output_shape[1:].numel()

    return flops
5. averagepool 计算
def compute_avgpool2d_flops(mod, input_shape = None, output_shape = None, macs = False):

    flops = 0
    if not macs:
        flops = mod.kernel_size**2 * output_shape[1:].numel()

    return flops
6. softmax 计算
def compute_softmax_flops(mod, input_shape = None, output_shape = None, macs = False):
    nfeatures = input_shape[1:].numel()
    total_exp = nfeatures #
    total_add = nfeatures - 1
    total_div = nfeatures
    flops = total_div + total_exp
    if not macs:
        flops += total_add
    return flops
A simplified 3-D Tensor (channels, height, weight) for convolutional neural networks.
class Tensor(object):
    def __init__(self, c, h, w):
        self.c = c
        self.h = h
        self.w = w

    def equals(self, other):
        return self.c == other.c and self.h == other.h and self.w == other.w

    def broadcastable(self, other):
        return (self.c % other.c == 0 or other.c % self.c == 0) and \
               (self.h % other.h == 0 or other.h % self.h == 0) and \
               (self.w % other.w == 0 or other.w % self.w == 0)

Calculate the single-sample inference-time params and FLOPs of a convolutional
neural network with PyTorch-like APIs.
To calculate the params and FLOPs of certain network architecture, CNNCalculator
needs to be inherited and the network needs to be defined as in PyTorch.
For convenience, some basic operators are pre-defined and other modules can be
defined in a similar way. Parameters and FLOPs in Batch Normalization and other
types of layers are also computed. If only Convolutional and Linear layers are
considered, please modify the code.
Refer to `` for details.
class CNNCalculator(object):
    def __init__(self, only_mac=False):
        self.params = 0
        self.flops = 0
        self.only_mac = only_mac

    def calculate(self, *inputs):
        raise NotImplementedError

    def Conv2d(self, tensor, out_c, size, stride=1, padding=0, groups=1, bias=True, name='conv'):
        if type(size) == int:
            size = (size, size)
        if type(stride) == int:
            stride = (stride, stride)
        if type(padding) == int:
            padding = (padding, padding)
        assert type(size) == tuple and len(size) == 2, 'illegal size parameters'
        assert type(stride) == tuple and len(stride) == 2, 'illegal stride parameters'
        assert type(padding) == tuple and len(padding) == 2, 'illegal padding parameters'
        size_h, size_w = size
        stride_h, stride_w = stride
        padding_h, padding_w = padding

        in_c = tensor.c
        out_h = (tensor.h - size_h + 2 * padding_h) // stride_h + 1
        out_w = (tensor.w - size_w + 2 * padding_w) // stride_w + 1
        assert in_c % groups == 0 and out_c % groups == 0, 'in_c and out_c must be divisible by groups'

        self.params += out_c * in_c // groups * size_h * size_w
        self.flops += out_c * out_h * out_w * in_c // groups * size_h * size_w
        if bias:
            self.params += out_c
            self.flops += out_c * out_w * out_h

        return Tensor(out_c, out_h, out_w)

    def BatchNorm2d(self, tensor, name='batch_norm'):
        return tensor
        # Batch normalization can be combined with the preceding convolution, so there are no FLOPs.
        # out_c = tensor.c
        # out_h = tensor.h
        # out_w = tensor.w

        # if self.only_mac:
            # self.params += 4 * out_c
            # self.flops += out_c * out_h * out_w
        # return Tensor(out_c, out_h, out_w)

    def ReLU(self, tensor, name='relu'):
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w

        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)

    def Sigmoid(self, tensor, name='relu'):
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w

        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)

    def Pool2d(self, tensor, size, stride=1, padding=0, name='pool'):
        if type(size) == int:
            size = (size, size)
        if type(stride) == int:
            stride = (stride, stride)
        if type(padding) == int:
            padding = (padding, padding)
        assert type(size) == tuple and len(size) == 2, 'illegal size parameters'
        assert type(stride) == tuple and len(stride) == 2, 'illegal stride parameters'
        assert type(padding) == tuple and len(padding) == 2, 'illegal padding parameters'
        size_h, size_w = size
        stride_h, stride_w = stride
        padding_h, padding_w = padding

        out_c = tensor.c
        out_h = (tensor.h - size_h + 2 * padding_h) // stride_h + 1
        out_w = (tensor.w - size_w + 2 * padding_w) // stride_w + 1
        if not self.only_mac:
            self.flops += out_c * out_h * out_w * size_h * size_w
        return Tensor(out_c, out_h, out_w)

    def AvgPool2d(self, tensor, size, stride=1, padding=0, name='avg_pool'):
        return self.Pool2d(tensor, size, stride=stride, padding=padding, name=name)

    def MaxPool2d(self, tensor, size, stride=1, padding=0, name='max_pool'):
        return self.Pool2d(tensor, size, stride=stride, padding=padding, name=name)

    def GlobalAvgPool2d(self, tensor, name='global_avg_pool'):
        size = (tensor.h, tensor.w)
        return self.AvgPool2d(tensor, size)

    def GlobalMaxPool2d(self, tensor, name='global_max_pool'):
        size = (tensor.h, tensor.w)
        return self.MaxPool2d(tensor, size)

    def Linear(self, tensor, out_c, name='fully_connected'):
        in_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w
        assert out_h == 1 and out_w == 1, 'out_h or out_w is greater than 1 in Linear layer.'
        self.params += in_c * out_c
        self.flops += in_c * out_c
        return Tensor(out_c, out_h, out_w)

    def Concat(self, tensors, name='concat'):
        out_c = 0
        out_h = tensors[0].h
        out_w = tensors[0].w
        for tensor in tensors:
            assert tensor.h == out_h and tensor.w == out_w, 'tensor dimensions mismatch in Concat layer.'
            out_c += tensor.c
        return Tensor(out_c, out_h, out_w)

    def MultiAdd(self, tensor, other, name='multi_add'):
        assert tensor.broadcastable(other), 'tensor dimensions mismatch in Add layer.'
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w
        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)

    def Add(self, tensor, other, name='add'):
        return self.MultiAdd(tensor, other, name=name)

    def Multi(self, tensor, other, name='multi'):
        return self.MultiAdd(tensor, other, name=name)

    def SplitBySize(self, tensor, sizes, name='split_by_size'):
        assert sum(sizes) == tensor.c, 'sizes and tensor.c do not match.'
        return [Tensor(c, tensor.h, tensor.w) for c in sizes]






