cnn中关于FLOPS的理解及计算

最新推荐文章于 2024-09-06 00:57:10 发布

陶表犁

最新推荐文章于 2024-09-06 00:57:10 发布

阅读量2.5k

点赞数

分类专栏： onnx 文章标签： ncnn cnn 卷积神经网络 flops

本文链接：https://blog.csdn.net/tbl1234567/article/details/109240855

版权

onnx 专栏收录该内容

5 篇文章 0 订阅

订阅专栏

OP的计算

1. conv 计算

def compute_conv2d_flops(mod, input_shape = None, output_shape = None, macs = False):
    _, cin, _, _ = input_shape
    _, _, h, w, = output_shape
    
    w_cout, w_cin, w_h, w_w =  mod.weight.data.shape

    if mod.groups != 1:
        input_channels = 1
    else:
        assert cin == w_cin
        input_channels = w_cin

    output_channels = w_cout
    stride = mod.stride[0]
#     flops = h * w * output_channels * input_channels * w_h * w_w / (stride**2)
    flops = h * w * output_channels * input_channels * w_h * w_w

    
    if not macs:
        flops_bias = output_shape[1:].numel() if mod.bias is not None else 0
        flops = 2 * flops + flops_bias
        
    return int(flops)

2. fc 计算

def compute_fc_flops(mod, input_shape = None, output_shape = None, macs = False):
    ft_in, ft_out =  mod.weight.data.shape
    flops = ft_in * ft_out
    
    if not macs:
        flops_bias = ft_out if mod.bias is not None else 0
        flops = 2 * flops + flops_bias
        
    return int(flops)

def compute_bn2d_flops(mod, input_shape = None, output_shape = None, macs = False):
    # subtract, divide, gamma, beta
    flops = 2 * input_shape[1:].numel()
    
    if not macs:
        flops *= 2
    
    return int(flops)

3. relu 计算

def compute_relu_flops(mod, input_shape = None, output_shape = None, macs = False):
    
    flops = 0
    if not macs:
        flops = input_shape[1:].numel()

    return int(flops)

4. maxpool 计算

def compute_maxpool2d_flops(mod, input_shape = None, output_shape = None, macs = False):

    flops = 0
    if not macs:
        flops = mod.kernel_size**2 * output_shape[1:].numel()

    return flops

5. averagepool 计算

def compute_avgpool2d_flops(mod, input_shape = None, output_shape = None, macs = False):

    flops = 0
    if not macs:
        flops = mod.kernel_size**2 * output_shape[1:].numel()

    return flops

6. softmax 计算

def compute_softmax_flops(mod, input_shape = None, output_shape = None, macs = False):
    
    nfeatures = input_shape[1:].numel()
    
    total_exp = nfeatures # https://stackoverflow.com/questions/3979942/what-is-the-complexity-real-cost-of-exp-in-cmath-compared-to-a-flop
    total_add = nfeatures - 1
    total_div = nfeatures
    
    flops = total_div + total_exp
    
    if not macs:
        flops += total_add
        
    return flops

另一种计算方法–感觉更合理

'''
A simplified 3-D Tensor (channels, height, weight) for convolutional neural networks.
'''
class Tensor(object):
    def __init__(self, c, h, w):
        self.c = c
        self.h = h
        self.w = w


    def equals(self, other):
        return self.c == other.c and self.h == other.h and self.w == other.w


    def broadcastable(self, other):
        return (self.c % other.c == 0 or other.c % self.c == 0) and \
               (self.h % other.h == 0 or other.h % self.h == 0) and \
               (self.w % other.w == 0 or other.w % self.w == 0)


'''
Calculate the single-sample inference-time params and FLOPs of a convolutional
neural network with PyTorch-like APIs.
To calculate the params and FLOPs of certain network architecture, CNNCalculator
needs to be inherited and the network needs to be defined as in PyTorch.
For convenience, some basic operators are pre-defined and other modules can be
defined in a similar way. Parameters and FLOPs in Batch Normalization and other
types of layers are also computed. If only Convolutional and Linear layers are
considered, please modify the code.
Refer to `MobileNet.py` for details.
'''
class CNNCalculator(object):
    def __init__(self, only_mac=False):
        self.params = 0
        self.flops = 0
        self.only_mac = only_mac


    def calculate(self, *inputs):
        raise NotImplementedError


    def Conv2d(self, tensor, out_c, size, stride=1, padding=0, groups=1, bias=True, name='conv'):
        if type(size) == int:
            size = (size, size)
        if type(stride) == int:
            stride = (stride, stride)
        if type(padding) == int:
            padding = (padding, padding)
        assert type(size) == tuple and len(size) == 2, 'illegal size parameters'
        assert type(stride) == tuple and len(stride) == 2, 'illegal stride parameters'
        assert type(padding) == tuple and len(padding) == 2, 'illegal padding parameters'
        size_h, size_w = size
        stride_h, stride_w = stride
        padding_h, padding_w = padding

        in_c = tensor.c
        out_h = (tensor.h - size_h + 2 * padding_h) // stride_h + 1
        out_w = (tensor.w - size_w + 2 * padding_w) // stride_w + 1
        assert in_c % groups == 0 and out_c % groups == 0, 'in_c and out_c must be divisible by groups'

        self.params += out_c * in_c // groups * size_h * size_w
        self.flops += out_c * out_h * out_w * in_c // groups * size_h * size_w
        if bias:
            self.params += out_c
            self.flops += out_c * out_w * out_h

        return Tensor(out_c, out_h, out_w)


    def BatchNorm2d(self, tensor, name='batch_norm'):
        return tensor
        # Batch normalization can be combined with the preceding convolution, so there are no FLOPs.
        # out_c = tensor.c
        # out_h = tensor.h
        # out_w = tensor.w

        # if self.only_mac:
            # self.params += 4 * out_c
            # self.flops += out_c * out_h * out_w
        # return Tensor(out_c, out_h, out_w)


    def ReLU(self, tensor, name='relu'):
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w

        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)


    def Sigmoid(self, tensor, name='relu'):
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w

        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)


    def Pool2d(self, tensor, size, stride=1, padding=0, name='pool'):
        if type(size) == int:
            size = (size, size)
        if type(stride) == int:
            stride = (stride, stride)
        if type(padding) == int:
            padding = (padding, padding)
        assert type(size) == tuple and len(size) == 2, 'illegal size parameters'
        assert type(stride) == tuple and len(stride) == 2, 'illegal stride parameters'
        assert type(padding) == tuple and len(padding) == 2, 'illegal padding parameters'
        size_h, size_w = size
        stride_h, stride_w = stride
        padding_h, padding_w = padding

        out_c = tensor.c
        out_h = (tensor.h - size_h + 2 * padding_h) // stride_h + 1
        out_w = (tensor.w - size_w + 2 * padding_w) // stride_w + 1
        if not self.only_mac:
            self.flops += out_c * out_h * out_w * size_h * size_w
        return Tensor(out_c, out_h, out_w)


    def AvgPool2d(self, tensor, size, stride=1, padding=0, name='avg_pool'):
        return self.Pool2d(tensor, size, stride=stride, padding=padding, name=name)


    def MaxPool2d(self, tensor, size, stride=1, padding=0, name='max_pool'):
        return self.Pool2d(tensor, size, stride=stride, padding=padding, name=name)


    def GlobalAvgPool2d(self, tensor, name='global_avg_pool'):
        size = (tensor.h, tensor.w)
        return self.AvgPool2d(tensor, size)


    def GlobalMaxPool2d(self, tensor, name='global_max_pool'):
        size = (tensor.h, tensor.w)
        return self.MaxPool2d(tensor, size)


    def Linear(self, tensor, out_c, name='fully_connected'):
        in_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w
        assert out_h == 1 and out_w == 1, 'out_h or out_w is greater than 1 in Linear layer.'
        self.params += in_c * out_c
        self.flops += in_c * out_c
        return Tensor(out_c, out_h, out_w)


    def Concat(self, tensors, name='concat'):
        out_c = 0
        out_h = tensors[0].h
        out_w = tensors[0].w
        for tensor in tensors:
            assert tensor.h == out_h and tensor.w == out_w, 'tensor dimensions mismatch in Concat layer.'
            out_c += tensor.c
        return Tensor(out_c, out_h, out_w)


    def MultiAdd(self, tensor, other, name='multi_add'):
        assert tensor.broadcastable(other), 'tensor dimensions mismatch in Add layer.'
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w
        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)


    def Add(self, tensor, other, name='add'):
        return self.MultiAdd(tensor, other, name=name)


    def Multi(self, tensor, other, name='multi'):
        return self.MultiAdd(tensor, other, name=name)


    def SplitBySize(self, tensor, sizes, name='split_by_size'):
        assert sum(sizes) == tensor.c, 'sizes and tensor.c do not match.'
        return [Tensor(c, tensor.h, tensor.w) for c in sizes]

转载请注明出处:https://blog.csdn.net/tbl1234567.作者:陶表犁