caffe-python

最新推荐文章于 2024-01-25 16:02:13 发布

知识在于分享

最新推荐文章于 2024-01-25 16:02:13 发布

阅读量271

点赞数

分类专栏：深度学习

本文链接：https://blog.csdn.net/baidu_40840693/article/details/97241263

版权

深度学习专栏收录该内容

255 篇文章 18 订阅

订阅专栏

# -*- coding: utf-8 -*-
from __future__ import print_function

import os

import caffe
from caffe import layers as L
from caffe import params as P

from caffe.proto import caffe_pb2
from caffe import to_proto
from google.protobuf import text_format

import common
import efficientnet

#加大递归深度
import sys
sys.setrecursionlimit(3000)

path='./new_txt/'
train_list='./new_txt/train_list.txt'
val_list='./new_txt/test_list.txt'               


def create_net(train_list,batch_size,include_acc=False):
    '''
    NetSpec可以用作命名,下面每一个spec.后面的字符直接就作为了该层的名字,没有使用spec的,系统会自动生成,在函数中的命名就是自动生成的,因为无法传递spec
    spec.data,spec.label=L.ImageData(source=train_list,batch_size=batch_size,shuffle=True,ntop=2, 
                                    transform_param=dict(crop_size=112,mirror=False,scale=0.0078125,mean_value=127.5),
                                    phase=0) '''
    data, label=L.ImageData(source=train_list,batch_size=batch_size,shuffle=True,ntop=2,
                           transform_param=dict(crop_size=112,mirror=False,scale=0.0078125,mean_value=127.5),include=dict(phase=caffe.TRAIN))
    conv = efficientnet.efficientnet_b7(data)
    prob = L.Softmax(conv)
    net_param = caffe.to_proto(prob)
    del net_param.layer[0]
    net_param.name = 'yangninghua_deploy'
    net_param.input.extend(['data'])
    net_param.input_shape.extend([
        caffe_pb2.BlobShape(dim=[1, 3, 224, 224])])
    return net_param


def write_net():
    with open('./new_txt/deploy_efficientnet_b7.txt', 'w') as f:
        f.write(str(create_net(train_list,batch_size=96)))

if __name__ == '__main__':
    write_net()

# -*- coding: utf-8 -*-
from __future__ import print_function
"""
    Common routines for models in Caffe.
"""

__all__ = ['BatchNorm_Scale', 'Swish', 'HSigmoid', 'HSwish', 'get_activation_layer', 'conv1x1', 'conv3x3', 'depthwise_conv3x3', 'ConvBlock', 'conv1x1_block',
           'conv3x3_block', 'conv5x5_block', 'conv7x7_block', 'dwconv3x3_block', 'dwconv5x5_block', 'PreConvBlock', 'pre_conv1x1_block',
           'pre_conv3x3_block', 'channel_shuffle', 'ChannelShuffle', 'channel_shuffle2', 'ChannelShuffle2', 'SEBlock']
# 

import os
import math
from inspect import isfunction

import caffe
from caffe import layers as L
from caffe import params as P

from caffe.proto import caffe_pb2
from caffe import to_proto
from google.protobuf import text_format


#out_channels -caffe num_output
#padding -caffe pad
#groups -caffe group

def BatchNorm_Scale(input):
    #caffe利用in-place计算可以节省内（显）存，同时还可以省去反复申请和释放内存的时间
    #目前已知的支持in-place操作的层有：ReLU层，Dropout层，BatchNorm层，Scale层
    bn = L.BatchNorm(input, use_global_stats=False,in_place=True)
    #scale = L.Scale(bn,filler=dict(value=1),bias_filler=dict(value=0),bias_term=True, in_place=True)
    scale = L.Scale(bn,bias_term=True, in_place=True)
    return scale


def Swish(input):
    """
    Swish activation function from 'Searching for Activation Functions,' https://arxiv.org/abs/1710.05941.

    x * sigmoid(x)
    """
    #operation=0 PROD
    #operation=1 SUM
    #operation=2 MAX
    sigmoid = L.Sigmoid(input)
    return L.Eltwise(
        input, 
        sigmoid, 
        eltwise_param=dict(operation=0))


def HSigmoid(input):
    """
    Approximated sigmoid function, so-called hard-version of sigmoid from 'Searching for MobileNetV3,'
    https://arxiv.org/abs/1905.02244.

    relu6(x + 3.0) / 6.0
    """
    power = L.Power(input, power=1, scale=1,shift=3)
    #Leaky Relu公式如下 f(x) = max(x, 0) + alpha*min(x, 0)
    #ReLU6有参数negative_slope，即为alpha，默认为0
    relu6 = L.ReLU6(power)
    return L.Power(
        relu6,
        power=1,
        scale=0.1666666667,
        shift=0)


def HSwish(input):
    """
    H-Swish activation function from 'Searching for MobileNetV3,' https://arxiv.org/abs/1905.02244.

    x * relu6(x + 3.0) / 6.0
    Parameters:
    ----------

    """
    power1 = L.Power(input, power=1, scale=1,shift=3)
    #Leaky Relu公式如下 f(x) = max(x, 0) + alpha*min(x, 0)
    #ReLU6有参数negative_slope，即为alpha，默认为0
    #operation=0 PROD
    #operation=1 SUM
    #operation=2 MAX
    relu6 = L.ReLU6(power1)
    power2 = L.Power(relu6, power=1, scale=0.1666666667,shift=0)
    return L.Eltwise(
        input, 
        power2, 
        eltwise_param=dict(operation=0))


def get_activation_layer(input,activation):
    """
    Create activation layer from string/function.

    Parameters:
    ----------
    activation : function, or str, or nn.Module
        Activation function or name of activation function.

    Returns
    -------
    nn.Module
        Activation layer.
    """
    assert (activation is not None)
    if isinstance(activation, str):
        if activation == "relu":
            return L.ReLU(input, in_place=True)
        elif activation == "relu6":
            return L.ReLU6(input)
        elif activation == "swish":
            return Swish(input)
        elif activation == "hswish":
            return HSwish(input)
        else:
            raise NotImplementedError()
    else:
        raise NotImplementedError()


def conv1x1(input,
            out_channels,
            stride=1,
            groups=1,
            bias=False):
    """
    Convolution 1x1 layer.

    Parameters:
    ----------
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    """
    #需要权重初始化因为caffe默认constant=0
    #测试代码   
    # conv = conv1x1(input=spec.data,out_channels=500)
    # conv = conv1x1(input=conv,out_channels=500,bias=True)
    if bias:
        return L.Convolution(
            input,
            kernel_size=1,
            stride=stride,
            num_output=out_channels,
            group=groups,
            bias_term=True,
            weight_filler=dict(type='xavier'),
            bias_filler=dict(type='constant', value=0))
    else:
        return L.Convolution(
            input,
            kernel_size=1,
            stride=stride,
            num_output=out_channels,
            group=groups,
            bias_term=False,
            weight_filler=dict(type='xavier'))

def conv3x3(input,
            out_channels,
            stride=1,
            padding=1,
            dilation=1,
            groups=1,
            bias=False):
    """
    Convolution 3x3 layer.

    Parameters:
    ----------
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 1
        Padding value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    """
    #需要权重初始化因为caffe默认constant=0
    #测试代码   
    # conv = conv3x3(input=spec.data,out_channels=500)
    # conv = conv3x3(input=conv,out_channels=500,bias=True)
    if bias:
        return L.Convolution(
            input,
            kernel_size=3,
            stride=stride,
            num_output=out_channels,
            group=groups,
            bias_term=True,
            pad=padding,
            dilation=dilation,
            weight_filler=dict(type='xavier'),
            bias_filler=dict(type='constant', value=0))
    else:
        return L.Convolution(
            input,
            kernel_size=3,
            stride=stride,
            num_output=out_channels,
            group=groups,
            pad=padding,
            dilation=dilation,
            bias_term=False,
            weight_filler=dict(type='xavier'))


def depthwise_conv3x3(input,
                      channels,
                      stride,
                      bias,
                      padding=1):
    """
    Depthwise convolution 3x3 layer.

    Parameters:
    ----------
    channels : int
        Number of input/output channels.
    strides : int or tuple/list of 2 int
        Strides of the convolution.
    """
    #自定义层来源：
    #ConvolutionDepthwiseLayer
    #ConvolutionDepthwise
    #conv_dw_layer.hpp
    #https://blog.csdn.net/blogshinelee/article/details/86094419
    #https://github.com/farmingyard/caffe-mobilenet
    #https://github.com/GeekLee95/MobileNet_caffe
    #https://github.com/anlongstory/ShuffleNet_V2-caffe
    #engine: CAFFE

    #DepthwiseConvolutionLayer
    #DepthwiseConvolution
    #depthwise_conv_layer.hpp
    #https://github.com/yonghenglh6/DepthwiseConvolution
    #https://blog.csdn.net/nus_cs/article/details/78126804

    #caffe存在的一个问题是内存管理较糟糕
    #https://blog.csdn.net/nus_cs/article/details/78126804

    #L.ConvolutionDepthwise
    #LDepthwiseConvolution
    if bias:
        return L.ConvolutionDepthwise(
            input,
            kernel_size=3,
            stride=stride,
            num_output=channels,
            #engine=1,
            pad=padding,
            bias_term=True,
            weight_filler=dict(type='xavier'),
            bias_filler=dict(type='constant', value=0))
    else:
        return L.ConvolutionDepthwise(
            input,
            kernel_size=3,
            stride=stride,
            num_output=channels,
            #engine=1,
            pad=padding,
            bias_term=False,
            weight_filler=dict(type='xavier'))


def ConvBlock(input,
              out_channels,
              kernel_size,
              stride,
              padding,
              dilation=1,
              groups=1,
              bias=False,
              bn_eps=1e-5,
              activation=(lambda: "relu")):
    """
    Standard convolution block with Batch normalization and activation.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    kernel_size : int or tuple/list of 2 int
        Convolution window size.
    stride : int or tuple/list of 2 int
        Strides of the convolution.
    padding : int or tuple/list of 2 int
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    activate : bool, default True
        Whether activate the convolution block.
    """

    if bias:
        if groups == out_channels:
            conv = L.ConvolutionDepthwise(input,
                    kernel_size=kernel_size,
                    stride=stride,
                    num_output=out_channels,
                    #engine=1,
                    pad=padding,
                    dilation=dilation,
                    group=groups,
                    bias_term=True,
                    weight_filler=dict(type='xavier'),
                    bias_filler=dict(type='constant', value=0))
        else:
            conv = L.Convolution(input,
                                kernel_size=kernel_size,
                                stride=stride,
                                num_output=out_channels,
                                pad=padding,
                                dilation=dilation,
                                bias_term=True,
                                weight_filler=dict(type='xavier'),
                                bias_filler=dict(type='constant', value=0))
    else:
        if groups == out_channels:
            conv = L.ConvolutionDepthwise(input,
                                kernel_size=kernel_size,
                                stride=stride,
                                num_output=out_channels,
                                #engine=1,
                                pad=padding,
                                dilation=dilation,
                                bias_term=False,
                                weight_filler=dict(type='xavier'))
        else:
            conv = L.Convolution(input,
                    kernel_size=kernel_size,
                    stride=stride,
                    num_output=out_channels,
                    pad=padding,
                    dilation=dilation,
                    group=groups,
                    bias_term=False,
                    weight_filler=dict(type='xavier'))

    batchnorm = BatchNorm_Scale(conv)
    activate = (activation is not None)
    if activate:
        return get_activation_layer(batchnorm,activation)
    return batchnorm

def conv1x1_block(input,
                  out_channels,
                  stride=1,
                  padding=0,
                  groups=1,
                  bias=False,
                  bn_eps=1e-5,
                  activation=(lambda: "relu")):
    """
    1x1 version of the standard convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 0
        Padding value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return ConvBlock(
        input=input,
        out_channels=out_channels,
        kernel_size=1,
        stride=stride,
        padding=padding,
        groups=groups,
        bias=bias,
        bn_eps=bn_eps,
        activation=activation)


def conv3x3_block(input,
                  out_channels,
                  stride=1,
                  padding=1,
                  dilation=1,
                  groups=1,
                  bias=False,
                  bn_eps=1e-5,
                  activation=(lambda: "relu")):
    """
    3x3 version of the standard convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 1
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return ConvBlock(
        input=input,
        out_channels=out_channels,
        kernel_size=3,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=groups,
        bias=bias,
        bn_eps=bn_eps,
        activation=activation)


def conv5x5_block(input,
                  out_channels,
                  stride=1,
                  padding=2,
                  dilation=1,
                  groups=1,
                  bias=False,
                  bn_eps=1e-5,
                  activation=(lambda: "relu")):
    """
    5x5 version of the standard convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 2
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    groups : int, default 1
        Number of groups.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return ConvBlock(
        input=input,
        out_channels=out_channels,
        kernel_size=5,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=groups,
        bias=bias,
        bn_eps=bn_eps,
        activation=activation)


def conv7x7_block(input,
                  out_channels,
                  stride=1,
                  padding=3,
                  bias=False,
                  bn_eps=1e-5,
                  activation=(lambda: "relu")):
    """
    7x7 version of the standard convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 3
        Padding value for convolution layer.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return ConvBlock(
        input=input,
        out_channels=out_channels,
        kernel_size=7,
        stride=stride,
        padding=padding,
        bias=bias,
        bn_eps=bn_eps,
        activation=activation)


def dwconv3x3_block(input,
                    out_channels,
                    stride=1,
                    padding=1,
                    dilation=1,
                    bias=False,
                    bn_eps=1e-5,
                    activation=(lambda: "relu")):
    """
    3x3 depthwise version of the standard convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 1
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return conv3x3_block(
        input=input,
        out_channels=out_channels,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=out_channels,
        bias=bias,
        bn_eps=bn_eps,
        activation=activation)


def dwconv5x5_block(input,
                    out_channels,
                    stride=1,
                    padding=2,
                    dilation=1,
                    bias=False,
                    bn_eps=1e-5,
                    activation=(lambda: "relu")):
    """
    5x5 depthwise version of the standard convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 2
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    bias : bool, default False
        Whether the layer uses a bias vector.
    activation : function or str or None, default nn.ReLU(inplace=True)
        Activation function or name of activation function.
    """
    return conv5x5_block(
        input=input,
        out_channels=out_channels,
        stride=stride,
        padding=padding,
        dilation=dilation,
        groups=out_channels,
        bias=bias,
        bn_eps=bn_eps,
        activation=activation)


def PreConvBlock(input,
                out_channels,
                kernel_size,
                stride,
                padding,
                dilation=1,
                bias=False,
                return_preact=False,
                activate=True):
    """
    Convolution block with Batch normalization and ReLU pre-activation.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    kernel_size : int or tuple/list of 2 int
        Convolution window size.
    stride : int or tuple/list of 2 int
        Strides of the convolution.
    padding : int or tuple/list of 2 int
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    bias : bool, default False
        Whether the layer uses a bias vector.
    return_preact : bool, default False
        Whether return pre-activation. It's used by PreResNet.
    activate : bool, default True
        Whether activate the convolution block.
    """
    batchnorm = BatchNorm_Scale(input)
    if activate:
        relu = get_activation_layer(batchnorm,"relu")
    else:
        relu = batchnorm

    if bias:
        conv = L.Convolution(
            relu,
            kernel_size=kernel_size,
            stride=stride,
            num_output=out_channels,
            bias_term=True,
            pad=padding,
            dilation=dilation,
            weight_filler=dict(type='xavier'),
            bias_filler=dict(type='constant', value=0))
    else:
        conv = L.Convolution(
            relu,
            kernel_size=kernel_size,
            stride=stride,
            num_output=out_channels,
            pad=padding,
            dilation=dilation,
            bias_term=False,
            weight_filler=dict(type='xavier'))
    if return_preact:
        return conv,relu
    else:
        return conv


def pre_conv1x1_block(input,
                      out_channels,
                      stride=1,
                      bias=False,
                      return_preact=False,
                      activate=True):
    """
    1x1 version of the pre-activated convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    bias : bool, default False
        Whether the layer uses a bias vector.
    return_preact : bool, default False
        Whether return pre-activation.
    activate : bool, default True
        Whether activate the convolution block.
    """
    return PreConvBlock(
        input=input,
        out_channels=out_channels,
        kernel_size=1,
        stride=stride,
        padding=0,
        bias=bias,
        return_preact=return_preact,
        activate=activate)


def pre_conv3x3_block(input,
                      out_channels,
                      stride=1,
                      padding=1,
                      dilation=1,
                      return_preact=False,
                      activate=True):
    """
    3x3 version of the pre-activated convolution block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int, default 1
        Strides of the convolution.
    padding : int or tuple/list of 2 int, default 1
        Padding value for convolution layer.
    dilation : int or tuple/list of 2 int, default 1
        Dilation value for convolution layer.
    return_preact : bool, default False
        Whether return pre-activation.
    activate : bool, default True
        Whether activate the convolution block.
    """
    return PreConvBlock(
        input=input,
        out_channels=out_channels,
        kernel_size=3,
        stride=stride,
        padding=padding,
        dilation=dilation,
        return_preact=return_preact,
        activate=activate)


def channel_shuffle(input,
                    groups):
    """
    Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
    https://arxiv.org/abs/1707.01083.

    Parameters:
    ----------
    x : Tensor
        Input tensor.
    groups : int
        Number of groups.

    Returns
    -------
    Tensor
        Resulted tensor.
    """
    return L.ShuffleChannel(input,group=groups)


def ChannelShuffle(input,
                channels,
                groups):
    """
    Channel shuffle layer. This is a wrapper over the same operation. It is designed to save the number of groups.

    Parameters:
    ----------
    channels : int
        Number of channels.
    groups : int
        Number of groups.
    """
    # assert (channels % groups == 0)
    if channels % groups != 0:
        raise ValueError('channels must be divisible by groups')
    return channel_shuffle(input, groups)


def channel_shuffle2(input,
                     groups):
    """
    Channel shuffle operation from 'ShuffleNet: An Extremely Efficient Convolutional Neural Network for Mobile Devices,'
    https://arxiv.org/abs/1707.01083. The alternative version.

    Parameters:
    ----------
    x : Tensor
        Input tensor.
    groups : int
        Number of groups.

    Returns
    -------
    Tensor
        Resulted tensor.
    """
    return L.ShuffleChannel(input,group=groups)


def ChannelShuffle2(input,
                channels,
                groups):
    """
    Channel shuffle layer. This is a wrapper over the same operation. It is designed to save the number of groups.
    The alternative version.

    Parameters:
    ----------
    channels : int
        Number of channels.
    groups : int
        Number of groups.
    """

    # assert (channels % groups == 0)
    if channels % groups != 0:
        raise ValueError('channels must be divisible by groups')
    return channel_shuffle2(input, groups)


def SEBlock(input,
            channels,
            reduction=16,
            approx_sigmoid=False,
            activation=(lambda: "relu")):
    """
    Squeeze-and-Excitation block from 'Squeeze-and-Excitation Networks,' https://arxiv.org/abs/1709.01507.

    Parameters:
    ----------
    channels : int
        Number of channels.
    reduction : int, default 16
        Squeeze reduction value.
    approx_sigmoid : bool, default False
        Whether to use approximated sigmoid function.
    activation : function, or str, or nn.Module
        Activation function or name of activation function.
    """
    #通道点乘feature maps 使用自己定义的y=AX层
    #参考https://github.com/hujie-frank/SENet
    #这里的pooling还有一个优化版
    mid_cannels = channels // reduction
    pool = L.Pooling(input, global_pooling=True, pool=P.Pooling.AVE)
    conv1 = conv1x1(pool,
                    out_channels=mid_cannels,
                    bias=True)
    activ = get_activation_layer(conv1,activation)
    conv2 = conv1x1(activ,
                    out_channels=channels,
                    bias=True)
    sigmoid = HSigmoid(conv2) if approx_sigmoid else L.Sigmoid(conv2)
    #return L.Eltwise(
    #    input, 
    #    sigmoid, 
    #    eltwise_param=dict(operation=0))
    return L.Axy(
        sigmoid, 
        input)

"""
    EfficientNet for ImageNet-1K, implemented in PyTorch.
    Original paper: 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.
"""

__all__ = ['EfficientNet', 'efficientnet_b0', 'efficientnet_b1', 'efficientnet_b2', 'efficientnet_b3',
           'efficientnet_b4', 'efficientnet_b5', 'efficientnet_b6', 'efficientnet_b7', 'efficientnet_b0b',
           'efficientnet_b1b', 'efficientnet_b2b', 'efficientnet_b3b']

import os
import math
from inspect import isfunction

import caffe
from caffe import layers as L
from caffe import params as P

from caffe.proto import caffe_pb2
from caffe import to_proto
from google.protobuf import text_format

from common import * 

def calc_tf_padding(x,
                    in_channels,
                    kernel_size,
                    stride=1,
                    dilation=1):
    """
    Calculate TF-same like padding size.

    Parameters:
    ----------
    x : tensor
        Input tensor.
    kernel_size : int
        Convolution window size.
    stride : int, default 1
        Strides of the convolution.
    dilation : int, default 1
        Dilation value for convolution layer.

    Returns
    -------
    tuple of 4 int
        The size of the padding.
    """
    height, width = in_channels,in_channels
    oh = math.ceil(height / stride)
    ow = math.ceil(width / stride)
    pad_h = max((oh - 1) * stride + (kernel_size - 1) * dilation + 1 - height, 0)
    pad_w = max((ow - 1) * stride + (kernel_size - 1) * dilation + 1 - width, 0)
    return pad_h // 2, pad_h - pad_h // 2, pad_w // 2, pad_w - pad_w // 2


def round_channels(channels,
                   factor,
                   divisor=8):
    """
    Round weighted channel number.

    Parameters:
    ----------
    channels : int
        Original number of channels.
    factor : float
        Weight factor.
    divisor : int
        Alignment value.

    Returns
    -------
    int
        Weighted number of channels.
    """
    channels *= factor
    new_channels = max(int(channels + divisor / 2.0) // divisor * divisor, divisor)
    if new_channels < 0.9 * channels:
        new_channels += divisor
    return new_channels


def EffiDwsConvUnit(input,
                    in_channels,
                    out_channels,
                    stride,
                    bn_eps,
                    activation,
                    tf_mode):
    """
    EfficientNet specific depthwise separable convolution block/unit with BatchNorms and activations at each convolution
    layers.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    stride : int or tuple/list of 2 int
        Strides of the second convolution layer.
    bn_eps : float
        Small float added to variance in Batch norm.
    activation : str
        Name of activation function.
    tf_mode : bool
        Whether to use TF-like mode.
    """
    residual = (in_channels == out_channels) and (stride == 1)
    if residual:
        identity = input
    else:
        input = input

    #if self.tf_mode:
        #x = F.pad(x, pad=calc_tf_padding(x, kernel_size=3))

    dw_conv = dwconv3x3_block(
        input = input,
        out_channels=in_channels,
        padding=(0 if tf_mode else 1),
        bn_eps=bn_eps,
        activation=activation)

    se = SEBlock(
        input = dw_conv,
        channels=in_channels,
        reduction=4,
        activation=activation)

    pw_conv = conv1x1_block(
        input = se,
        out_channels=out_channels,
        bn_eps=bn_eps,
        activation=None)

    #ynh
    if residual:
        return L.Eltwise(
            pw_conv, 
            identity, 
            eltwise_param=dict(operation=1))
    else:
        return pw_conv

def EffiInvResUnit(input,
                in_channels,
                out_channels,
                kernel_size,
                stride,
                expansion_factor,
                bn_eps,
                activation,
                tf_mode):
    """
    EfficientNet inverted residual unit.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    kernel_size : int or tuple/list of 2 int
        Convolution window size.
    stride : int or tuple/list of 2 int
        Strides of the second convolution layer.
    expansion_factor : int
        Factor for expansion of channels.
    bn_eps : float
        Small float added to variance in Batch norm.
    activation : str
        Name of activation function.
    tf_mode : bool
        Whether to use TF-like mode.
    """
    residual = (in_channels == out_channels) and (stride == 1)
    mid_channels = in_channels * expansion_factor
    dwconv_block_fn = dwconv3x3_block if kernel_size == 3 else (dwconv5x5_block if kernel_size == 5 else None)

    if residual:
        identity = input
    else:
        input = input

    conv1 = conv1x1_block(
                        input = input,
                        out_channels=mid_channels,
                        bn_eps=bn_eps,
                        activation=activation)
    #if self.tf_mode:
        #x = F.pad(x, pad=calc_tf_padding(x, kernel_size=self.kernel_size, stride=self.stride))

    conv2 = dwconv_block_fn(
        input = conv1,
        out_channels=mid_channels,
        stride=stride,
        padding=(0 if tf_mode else (kernel_size // 2)),
        bn_eps=bn_eps,
        activation=activation)

    se = SEBlock(
        input = conv2,
        channels=mid_channels,
        reduction=24,
        activation=activation)

    conv3 = conv1x1_block(
        input = se,
        out_channels=out_channels,
        bn_eps=bn_eps,
        activation=None)

    #ynh long
    if residual:
        return L.Eltwise(
            conv3, 
            identity, 
            eltwise_param=dict(operation=1))
    else:
        return conv3

def EffiInitBlock(input,
                in_channels,
                out_channels,
                bn_eps,
                activation,
                tf_mode):
    """
    EfficientNet specific initial block.

    Parameters:
    ----------
    in_channels : int
        Number of input channels.
    out_channels : int
        Number of output channels.
    bn_eps : float
        Small float added to variance in Batch norm.
    activation : str
        Name of activation function.
    tf_mode : bool
        Whether to use TF-like mode.
    """
    #pad = calc_tf_padding(input, in_channels, kernel_size=3, stride=2)
    #if tf_mode:
        #m_pad = F.pad(input, in_channels, pad=calc_tf_padding(input, kernel_size=3, stride=2))
    #else:
        #m_pad = input
    m_pad = input
    return conv3x3_block(input = m_pad,
                        out_channels=out_channels,
                        stride=2,
                        padding=(0 if tf_mode else 1),
                        bn_eps=bn_eps,
                        activation=activation)


def EfficientNet(input,
                channels,
                init_block_channels,
                final_block_channels,
                kernel_sizes,
                strides_per_stage,
                expansion_factors,
                dropout_rate=0.2,
                tf_mode=False,
                bn_eps=1e-5,
                in_channels=3,
                in_size=(224, 224),
                num_classes=1000):
    """
    EfficientNet(-B0) model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    channels : list of list of int
        Number of output channels for each unit.
    init_block_channels : int
        Number of output channels for initial unit.
    final_block_channels : int
        Number of output channels for the final block of the feature extractor.
    kernel_sizes : list of list of int
        Number of kernel sizes for each unit.
    strides_per_stage : list int
        Stride value for the first unit of each stage.
    expansion_factors : list of list of int
        Number of expansion factors for each unit.
    dropout_rate : float, default 0.2
        Fraction of the input units to drop. Must be a number between 0 and 1.
    tf_mode : bool, default False
        Whether to use TF-like mode.
    bn_eps : float, default 1e-5
        Small float added to variance in Batch norm.
    in_channels : int, default 3
        Number of input channels.
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
    num_classes : int, default 1000
        Number of classification classes.
    """
    activation = "swish"
    eff = EffiInitBlock(
            input = input,
            in_channels=in_channels,
            out_channels=init_block_channels,
            bn_eps=bn_eps,
            activation=activation,
            tf_mode=tf_mode)
    #test code
    #with open("./ynh.prototxt", 'w') as f:
    #    f.write(str(caffe.to_proto(eff)))
        
    in_channels = init_block_channels
    for i, channels_per_stage in enumerate(channels):
        kernel_sizes_per_stage = kernel_sizes[i]
        expansion_factors_per_stage = expansion_factors[i]
        for j, out_channels in enumerate(channels_per_stage):
            kernel_size = kernel_sizes_per_stage[j]
            expansion_factor = expansion_factors_per_stage[j]
            stride = strides_per_stage[i] if (j == 0) else 1
            if i == 0:
                eff = EffiDwsConvUnit(
                                input = eff,
                                in_channels=in_channels,
                                out_channels=out_channels,
                                stride=stride,
                                bn_eps=bn_eps,
                                activation=activation,
                                tf_mode=tf_mode
                )
            else:
                eff = EffiInvResUnit(
                    input = eff,
                    in_channels=in_channels,
                    out_channels=out_channels,
                    kernel_size=kernel_size,
                    stride=stride,
                    expansion_factor=expansion_factor,
                    bn_eps=bn_eps,
                    activation=activation,
                    tf_mode=tf_mode
                )
            in_channels = out_channels
    eff = conv1x1_block(
                input = eff,
                out_channels=final_block_channels,
                bn_eps=bn_eps,
                activation=activation)
    
    in_channels = final_block_channels
    eff = L.Pooling(eff, global_pooling=True, pool=P.Pooling.AVE)
    if dropout_rate > 0.0:
        eff = L.Dropout(eff,dropout_ratio=dropout_rate)
    eff = L.InnerProduct(eff, num_output=num_classes,weight_filler=dict(type='xavier'))
    return eff


def get_efficientnet(input,
                     version,
                     in_size,
                     tf_mode=False,
                     bn_eps=1e-5,
                     model_name=None,
                     **kwargs):
    """
    Create EfficientNet model with specific parameters.

    Parameters:
    ----------
    version : str
        Version of EfficientNet ('b0'...'b7').
    in_size : tuple of two ints
        Spatial size of the expected input image.
    tf_mode : bool, default False
        Whether to use TF-like mode.
    bn_eps : float, default 1e-5
        Small float added to variance in Batch norm.
    model_name : str or None, default None
        Model name for loading pretrained model.
    """

    if version == "b0":
        assert (in_size == (224, 224))
        depth_factor = 1.0
        width_factor = 1.0
        dropout_rate = 0.2
    elif version == "b1":
        assert (in_size == (240, 240))
        depth_factor = 1.1
        width_factor = 1.0
        dropout_rate = 0.2
    elif version == "b2":
        assert (in_size == (260, 260))
        depth_factor = 1.2
        width_factor = 1.1
        dropout_rate = 0.3
    elif version == "b3":
        assert (in_size == (300, 300))
        depth_factor = 1.4
        width_factor = 1.2
        dropout_rate = 0.3
    elif version == "b4":
        assert (in_size == (380, 380))
        depth_factor = 1.8
        width_factor = 1.4
        dropout_rate = 0.4
    elif version == "b5":
        assert (in_size == (456, 456))
        depth_factor = 2.2
        width_factor = 1.6
        dropout_rate = 0.4
    elif version == "b6":
        assert (in_size == (528, 528))
        depth_factor = 2.6
        width_factor = 1.8
        dropout_rate = 0.5
    elif version == "b7":
        assert (in_size == (600, 600))
        depth_factor = 3.1
        width_factor = 2.0
        dropout_rate = 0.5
    else:
        raise ValueError("Unsupported EfficientNet version {}".format(version))

    init_block_channels = 32
    layers = [1, 2, 2, 3, 3, 4, 1]
    downsample = [1, 1, 1, 1, 0, 1, 0]
    channels_per_layers = [16, 24, 40, 80, 112, 192, 320]
    expansion_factors_per_layers = [1, 6, 6, 6, 6, 6, 6]
    kernel_sizes_per_layers = [3, 3, 5, 3, 5, 5, 3]
    strides_per_stage = [1, 2, 2, 2, 1, 2, 1]
    final_block_channels = 1280

    layers = [int(math.ceil(li * depth_factor)) for li in layers]
    channels_per_layers = [round_channels(ci, width_factor) for ci in channels_per_layers]

    from functools import reduce
    channels = reduce(lambda x, y: x + [[y[0]] * y[1]] if y[2] != 0 else x[:-1] + [x[-1] + [y[0]] * y[1]],
                      zip(channels_per_layers, layers, downsample), [])
    kernel_sizes = reduce(lambda x, y: x + [[y[0]] * y[1]] if y[2] != 0 else x[:-1] + [x[-1] + [y[0]] * y[1]],
                          zip(kernel_sizes_per_layers, layers, downsample), [])
    expansion_factors = reduce(lambda x, y: x + [[y[0]] * y[1]] if y[2] != 0 else x[:-1] + [x[-1] + [y[0]] * y[1]],
                               zip(expansion_factors_per_layers, layers, downsample), [])
    strides_per_stage = reduce(lambda x, y: x + [[y[0]] * y[1]] if y[2] != 0 else x[:-1] + [x[-1] + [y[0]] * y[1]],
                               zip(strides_per_stage, layers, downsample), [])
    strides_per_stage = [si[0] for si in strides_per_stage]

    init_block_channels = round_channels(init_block_channels, width_factor)

    if width_factor > 1.0:
        assert (int(final_block_channels * width_factor) == round_channels(final_block_channels, width_factor))
        final_block_channels = round_channels(final_block_channels, width_factor)

    net = EfficientNet(
        input = input,
        channels=channels,
        init_block_channels=init_block_channels,
        final_block_channels=final_block_channels,
        kernel_sizes=kernel_sizes,
        strides_per_stage=strides_per_stage,
        expansion_factors=expansion_factors,
        dropout_rate=dropout_rate,
        tf_mode=tf_mode,
        bn_eps=bn_eps,
        in_size=in_size,
        **kwargs)
    return net


def efficientnet_b0(input, in_size=(224, 224), **kwargs):
    """
    EfficientNet-B0 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b0", in_size=in_size, model_name="efficientnet_b0", **kwargs)


def efficientnet_b1(input, in_size=(240, 240), **kwargs):
    """
    EfficientNet-B1 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (240, 240)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b1", in_size=in_size, model_name="efficientnet_b1", **kwargs)


def efficientnet_b2(input, in_size=(260, 260), **kwargs):
    """
    EfficientNet-B2 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (260, 260)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b2", in_size=in_size, model_name="efficientnet_b2", **kwargs)


def efficientnet_b3(input, in_size=(300, 300), **kwargs):
    """
    EfficientNet-B3 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (300, 300)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b3", in_size=in_size, model_name="efficientnet_b3", **kwargs)


def efficientnet_b4(input, in_size=(380, 380), **kwargs):
    """
    EfficientNet-B4 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (380, 380)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b4", in_size=in_size, model_name="efficientnet_b4", **kwargs)


def efficientnet_b5(input, in_size=(456, 456), **kwargs):
    """
    EfficientNet-B5 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (456, 456)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b5", in_size=in_size, model_name="efficientnet_b5", **kwargs)


def efficientnet_b6(input, in_size=(528, 528), **kwargs):
    """
    EfficientNet-B6 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (528, 528)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b6", in_size=in_size, model_name="efficientnet_b6", **kwargs)


def efficientnet_b7(input, in_size=(600, 600), **kwargs):
    """
    EfficientNet-B7 model from 'EfficientNet: Rethinking Model Scaling for Convolutional Neural Networks,'
    https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (600, 600)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b7", in_size=in_size, model_name="efficientnet_b7", **kwargs)


def efficientnet_b0b(input, in_size=(224, 224), **kwargs):
    """
    EfficientNet-B0-b (like TF-implementation) model from 'EfficientNet: Rethinking Model Scaling for Convolutional
    Neural Networks,' https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (224, 224)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b0", in_size=in_size, tf_mode=True, bn_eps=1e-3, model_name="efficientnet_b0b",
                            **kwargs)


def efficientnet_b1b(input, in_size=(240, 240), **kwargs):
    """
    EfficientNet-B1-b (like TF-implementation) model from 'EfficientNet: Rethinking Model Scaling for Convolutional
    Neural Networks,' https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (240, 240)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b1", in_size=in_size, tf_mode=True, bn_eps=1e-3, model_name="efficientnet_b1b",
                            **kwargs)


def efficientnet_b2b(input, in_size=(260, 260), **kwargs):
    """
    EfficientNet-B2-b (like TF-implementation) model from 'EfficientNet: Rethinking Model Scaling for Convolutional
    Neural Networks,' https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (260, 260)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b2", in_size=in_size, tf_mode=True, bn_eps=1e-3, model_name="efficientnet_b2b",
                            **kwargs)


def efficientnet_b3b(input, in_size=(300, 300), **kwargs):
    """
    EfficientNet-B3-b (like TF-implementation) model from 'EfficientNet: Rethinking Model Scaling for Convolutional
    Neural Networks,' https://arxiv.org/abs/1905.11946.

    Parameters:
    ----------
    in_size : tuple of two ints, default (300, 300)
        Spatial size of the expected input image.
    pretrained : bool, default False
        Whether to load the pretrained weights for model.
    root : str, default '~/.torch/models'
        Location for keeping the model parameters.
    """
    return get_efficientnet(input = input, version="b3", in_size=in_size, tf_mode=True, bn_eps=1e-3, model_name="efficientnet_b3b",
                            **kwargs)

知识在于分享

关注

0
点赞
踩
1

收藏

觉得还不错? 一键收藏
0
评论
caffe-python

# -*- coding: utf-8 -*-from __future__ import print_functionimport osimport caffefrom caffe import layers as Lfrom caffe import params as Pfrom caffe.proto import caffe_pb2from caffe import...
复制链接

扫一扫

专栏目录