cnn中关于FLOPS的理解及计算

相关概念

FLOPS:注意全大写,是floating point operations per second的缩写,指每秒浮点运算次数,理解为计算速度。是一个衡量硬件性能的指标。

FLOPs:注意s小写,是floating point operations的缩写(s表复数),指浮点运算数,理解为计算量。可以用来衡量算法/模型的复杂度。

MACs:乘加运算(Multiplication and Accumulation),相当于2次浮点运算,硬件支持乘加指令可加快计算速度。

OP的计算
1. conv 计算
def compute_conv2d_flops(mod, input_shape = None, output_shape = None, macs = False):
    _, cin, _, _ = input_shape
    _, _, h, w, = output_shape
    
    w_cout, w_cin, w_h, w_w =  mod.weight.data.shape

    if mod.groups != 1:
        input_channels = 1
    else:
        assert cin == w_cin
        input_channels = w_cin

    output_channels = w_cout
    stride = mod.stride[0]
#     flops = h * w * output_channels * input_channels * w_h * w_w / (stride**2)
    flops = h * w * output_channels * input_channels * w_h * w_w

    
    if not macs:
        flops_bias = output_shape[1:].numel() if mod.bias is not None else 0
        flops = 2 * flops + flops_bias
        
    return int(flops)
2. fc 计算
def compute_fc_flops(mod, input_shape = None, output_shape = None, macs = False):
    ft_in, ft_out =  mod.weight.data.shape
    flops = ft_in * ft_out
    
    if not macs:
        flops_bias = ft_out if mod.bias is not None else 0
        flops = 2 * flops + flops_bias
        
    return int(flops)

def compute_bn2d_flops(mod, input_shape = None, output_shape = None, macs = False):
    # subtract, divide, gamma, beta
    flops = 2 * input_shape[1:].numel()
    
    if not macs:
        flops *= 2
    
    return int(flops)
3. relu 计算
def compute_relu_flops(mod, input_shape = None, output_shape = None, macs = False):
    
    flops = 0
    if not macs:
        flops = input_shape[1:].numel()

    return int(flops)
4. maxpool 计算
def compute_maxpool2d_flops(mod, input_shape = None, output_shape = None, macs = False):

    flops = 0
    if not macs:
        flops = mod.kernel_size**2 * output_shape[1:].numel()

    return flops
5. averagepool 计算
def compute_avgpool2d_flops(mod, input_shape = None, output_shape = None, macs = False):

    flops = 0
    if not macs:
        flops = mod.kernel_size**2 * output_shape[1:].numel()

    return flops
6. softmax 计算
def compute_softmax_flops(mod, input_shape = None, output_shape = None, macs = False):
    
    nfeatures = input_shape[1:].numel()
    
    total_exp = nfeatures # https://stackoverflow.com/questions/3979942/what-is-the-complexity-real-cost-of-exp-in-cmath-compared-to-a-flop
    total_add = nfeatures - 1
    total_div = nfeatures
    
    flops = total_div + total_exp
    
    if not macs:
        flops += total_add
        
    return flops
另一种计算方法–感觉更合理
'''
A simplified 3-D Tensor (channels, height, weight) for convolutional neural networks.
'''
class Tensor(object):
    def __init__(self, c, h, w):
        self.c = c
        self.h = h
        self.w = w


    def equals(self, other):
        return self.c == other.c and self.h == other.h and self.w == other.w


    def broadcastable(self, other):
        return (self.c % other.c == 0 or other.c % self.c == 0) and \
               (self.h % other.h == 0 or other.h % self.h == 0) and \
               (self.w % other.w == 0 or other.w % self.w == 0)


'''
Calculate the single-sample inference-time params and FLOPs of a convolutional
neural network with PyTorch-like APIs.
To calculate the params and FLOPs of certain network architecture, CNNCalculator
needs to be inherited and the network needs to be defined as in PyTorch.
For convenience, some basic operators are pre-defined and other modules can be
defined in a similar way. Parameters and FLOPs in Batch Normalization and other
types of layers are also computed. If only Convolutional and Linear layers are
considered, please modify the code.
Refer to `MobileNet.py` for details.
'''
class CNNCalculator(object):
    def __init__(self, only_mac=False):
        self.params = 0
        self.flops = 0
        self.only_mac = only_mac


    def calculate(self, *inputs):
        raise NotImplementedError


    def Conv2d(self, tensor, out_c, size, stride=1, padding=0, groups=1, bias=True, name='conv'):
        if type(size) == int:
            size = (size, size)
        if type(stride) == int:
            stride = (stride, stride)
        if type(padding) == int:
            padding = (padding, padding)
        assert type(size) == tuple and len(size) == 2, 'illegal size parameters'
        assert type(stride) == tuple and len(stride) == 2, 'illegal stride parameters'
        assert type(padding) == tuple and len(padding) == 2, 'illegal padding parameters'
        size_h, size_w = size
        stride_h, stride_w = stride
        padding_h, padding_w = padding

        in_c = tensor.c
        out_h = (tensor.h - size_h + 2 * padding_h) // stride_h + 1
        out_w = (tensor.w - size_w + 2 * padding_w) // stride_w + 1
        assert in_c % groups == 0 and out_c % groups == 0, 'in_c and out_c must be divisible by groups'

        self.params += out_c * in_c // groups * size_h * size_w
        self.flops += out_c * out_h * out_w * in_c // groups * size_h * size_w
        if bias:
            self.params += out_c
            self.flops += out_c * out_w * out_h

        return Tensor(out_c, out_h, out_w)


    def BatchNorm2d(self, tensor, name='batch_norm'):
        return tensor
        # Batch normalization can be combined with the preceding convolution, so there are no FLOPs.
        # out_c = tensor.c
        # out_h = tensor.h
        # out_w = tensor.w

        # if self.only_mac:
            # self.params += 4 * out_c
            # self.flops += out_c * out_h * out_w
        # return Tensor(out_c, out_h, out_w)


    def ReLU(self, tensor, name='relu'):
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w

        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)


    def Sigmoid(self, tensor, name='relu'):
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w

        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)


    def Pool2d(self, tensor, size, stride=1, padding=0, name='pool'):
        if type(size) == int:
            size = (size, size)
        if type(stride) == int:
            stride = (stride, stride)
        if type(padding) == int:
            padding = (padding, padding)
        assert type(size) == tuple and len(size) == 2, 'illegal size parameters'
        assert type(stride) == tuple and len(stride) == 2, 'illegal stride parameters'
        assert type(padding) == tuple and len(padding) == 2, 'illegal padding parameters'
        size_h, size_w = size
        stride_h, stride_w = stride
        padding_h, padding_w = padding

        out_c = tensor.c
        out_h = (tensor.h - size_h + 2 * padding_h) // stride_h + 1
        out_w = (tensor.w - size_w + 2 * padding_w) // stride_w + 1
        if not self.only_mac:
            self.flops += out_c * out_h * out_w * size_h * size_w
        return Tensor(out_c, out_h, out_w)


    def AvgPool2d(self, tensor, size, stride=1, padding=0, name='avg_pool'):
        return self.Pool2d(tensor, size, stride=stride, padding=padding, name=name)


    def MaxPool2d(self, tensor, size, stride=1, padding=0, name='max_pool'):
        return self.Pool2d(tensor, size, stride=stride, padding=padding, name=name)


    def GlobalAvgPool2d(self, tensor, name='global_avg_pool'):
        size = (tensor.h, tensor.w)
        return self.AvgPool2d(tensor, size)


    def GlobalMaxPool2d(self, tensor, name='global_max_pool'):
        size = (tensor.h, tensor.w)
        return self.MaxPool2d(tensor, size)


    def Linear(self, tensor, out_c, name='fully_connected'):
        in_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w
        assert out_h == 1 and out_w == 1, 'out_h or out_w is greater than 1 in Linear layer.'
        self.params += in_c * out_c
        self.flops += in_c * out_c
        return Tensor(out_c, out_h, out_w)


    def Concat(self, tensors, name='concat'):
        out_c = 0
        out_h = tensors[0].h
        out_w = tensors[0].w
        for tensor in tensors:
            assert tensor.h == out_h and tensor.w == out_w, 'tensor dimensions mismatch in Concat layer.'
            out_c += tensor.c
        return Tensor(out_c, out_h, out_w)


    def MultiAdd(self, tensor, other, name='multi_add'):
        assert tensor.broadcastable(other), 'tensor dimensions mismatch in Add layer.'
        out_c = tensor.c
        out_h = tensor.h
        out_w = tensor.w
        if not self.only_mac:
            self.flops += out_c * out_h * out_w
        return Tensor(out_c, out_h, out_w)


    def Add(self, tensor, other, name='add'):
        return self.MultiAdd(tensor, other, name=name)


    def Multi(self, tensor, other, name='multi'):
        return self.MultiAdd(tensor, other, name=name)


    def SplitBySize(self, tensor, sizes, name='split_by_size'):
        assert sum(sizes) == tensor.c, 'sizes and tensor.c do not match.'
        return [Tensor(c, tensor.h, tensor.w) for c in sizes]

转载请注明出处:https://blog.csdn.net/tbl1234567.作者:陶表犁

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值