ncnn源码分析1

最新推荐文章于 2024-07-23 12:51:16 发布

一名小菜鸟的学习之路

最新推荐文章于 2024-07-23 12:51:16 发布

阅读量2k

点赞数 3

分类专栏： ncnn学习笔记文章标签： c++ 深度学习神经网络 caffe 机器学习

原文链接：https://blog.csdn.net/sinat_31425585/article/details/100565943

版权

ncnn学习笔记专栏收录该内容

2 篇文章 2 订阅

订阅专栏

前段时间，分别尝试了使用腾讯开源的深度学习推理框架ncnn、陈天奇大神团队开源的tvm，及最新的阿里开源mnn，就好用程度来说，腾讯的ncnn当之无愧的第一名，这里大致写一下源码学习心得体会，方便后面进一步学习。

ncnn接口函数

在使用ncnn来部署模型时，我们会预先定义一个Net对象，然后使用load_param和load_model两个接口载入模型结构参数和模型权重参数：

ncnn::Net net;
net.load_param("mobilenet_yolo.param");
net.load_model("mobilenet_yolo.bin");

这里，我们打开ncnn源码中src子文件夹下面的net.h和net.cpp文件，可以看到：

class Net
{
public:
    // empty init
    Net();
    // clear and destroy
    ~Net();
 
public:
    // option can be changed before loading
    // 在载入之前，可以通过opt更改网络的一些设置
    Option opt;
 
 
#if NCNN_STRING
    // register custom layer by layer type name
    // return 0 if success
    // 注册自定义层：通过string类型名
    int register_custom_layer(const char* type, layer_creator_func creator);
#endif // NCNN_STRING
    // register custom layer by layer type
    // return 0 if success
    // 注册自定义层，通过int类型layer索引
    int register_custom_layer(int index, layer_creator_func creator);
 
#if NCNN_STDIO
#if NCNN_STRING
    // load network structure from plain param file
    // return 0 if success
    // 从文件指针中载入参数
    int load_param(FILE* fp);
    // 从param文件中载入参数
    int load_param(const char* protopath);
    // 从mem中载入参数
    int load_param_mem(const char* mem);
#endif // NCNN_STRING
    // load network structure from binary param file
    // return 0 if success
    // 从二进制文件指针中载入param参数
    int load_param_bin(FILE* fp);
    // 从二进制文件中载入参数
    int load_param_bin(const char* protopath);
 
    // load network weight data from model file
    // return 0 if success
    // 从file指针中载入模型
    int load_model(FILE* fp);
    // 从二进制文件中载入模型
    int load_model(const char* modelpath);
#endif // NCNN_STDIO
 
    // load network structure from external memory
    // memory pointer must be 32-bit aligned
    // return bytes consumed
    // 外部内存载入param
    int load_param(const unsigned char* mem);
 
    // reference network weight data from external memory
    // weight data is not copied but referenced
    // so external memory should be retained when used
    // memory pointer must be 32-bit aligned
    // return bytes consumed
    // 外部内存载入网络权重
    int load_model(const unsigned char* mem);
 
    // unload network structure and weight data
    // 清空网络结构
    void clear();
 
    // construct an Extractor from network
    // 从网络构建一个执行器
    Extractor create_extractor() const;
 
protected:
    // parse the structure of network
    // fuse int8 op dequantize and quantize by requantize
    // 网络重用
    int fuse_network();
 
    // 外部Extractor接口
    friend class Extractor;
#if NCNN_STRING
    // 通过name查找blob对应索引
    int find_blob_index_by_name(const char* name) const;
    // 通过name查找对应layer索引
    int find_layer_index_by_name(const char* name) const;
    // 通过类型查找对应layer索引
    int custom_layer_to_index(const char* type);
    // 通过类型创建layer
    Layer* create_custom_layer(const char* type);
#endif // NCNN_STRING
    // 通过index创建layer 
    Layer* create_custom_layer(int index);
    // 前向推理层
    int forward_layer(int layer_index, std::vector<Mat>& blob_mats, Option& opt) const;
 
protected:
    // blobs && layers
    std::vector<Blob> blobs;
    std::vector<Layer*> layers;
 
    // layers
    std::vector<layer_registry_entry> custom_layer_registry;
};

代码中已经将Vulkan相关代码给剔除掉了，这里可以看到上面用到的load_param和load_model接口，我们传入参数为const char*类型的参数。

然后，我们打开net.cpp文件：

// 从文件中载入net参数
int Net::load_param(const char* protopath)
{
    FILE* fp = fopen(protopath, "rb");
    if (!fp)
    {
        fprintf(stderr, "fopen %s failed\n", protopath);
        return -1;
    }
 
    // 从文件指针中载入param
    int ret = load_param(fp);
 
    fclose(fp);
 
    return ret;
}

参数载入接口中，调用了另外一个参数载入接口：load_param(FILE* fp)

// 载入网络参数
int Net::load_param(FILE* fp)

这里可以对照着我们的param列表来读：

7767517
24 25
Input            data                             0 1 data
Convolution      conv1                            1 1 data conv1 0=64 1=3 11=3 5=1 6=1728
PReLU            prelu1                           1 1 conv1 prelu1 0=64
Pooling          pool1                            1 1 prelu1 pool1 0=1 1=3 2=2 4=0 5=0
ConvolutionDepthWise conv2_dw                         1 1 pool1 conv2_dw 0=64 1=3 11=3 5=1 6=576 7=64
PReLU            prelu2_dw                        1 1 conv2_dw prelu2_dw 0=64
Convolution      conv2_sep                        1 1 prelu2_dw conv2_sep 0=128 1=1 11=1 5=1 6=8192
PReLU            prelu2_sep                       1 1 conv2_sep prelu2_sep 0=128
Pooling          pool2                            1 1 prelu2_sep pool2 0=1 1=3 2=2 4=0 5=0
ConvolutionDepthWise conv3_dw                         1 1 pool2 conv3_dw 0=128 1=3 11=3 5=1 6=1152 7=128
PReLU            prelu3_dw                        1 1 conv3_dw prelu3_dw 0=128
Convolution      conv3_sep                        1 1 prelu3_dw conv3_sep 0=256 1=1 11=1 5=1 6=32768
PReLU            prelu3_sep                       1 1 conv3_sep prelu3_sep 0=256
Pooling          pool3                            1 1 prelu3_sep pool3 0=1 1=2 2=2 4=0 5=0
ConvolutionDepthWise conv4_dw                         1 1 pool3 conv4_dw 0=256 1=2 11=2 5=1 6=1024 7=256
PReLU            prelu4_dw                        1 1 conv4_dw prelu4_dw 0=256
Convolution      conv4_sep                        1 1 prelu4_dw conv4_sep 0=512 1=1 11=1 5=1 6=131072
PReLU            prelu4_sep                       1 1 conv4_sep prelu4_sep 0=512
ConvolutionDepthWise conv5_dw                         1 1 prelu4_sep conv5_dw 0=512 1=3 11=3 5=1 6=4608 7=512
PReLU            prelu5_dw                        1 1 conv5_dw prelu5_dw 0=512
Convolution      conv5_sep                        1 1 prelu5_dw conv5_sep 0=512 1=1 11=1 5=1 6=262144
PReLU            prelu5_sep                       1 1 conv5_sep prelu5_sep 0=512
InnerProduct     conv6_3                          1 1 prelu5_sep conv6_3 0=212 1=1 2=108544
BatchNorm        bn6_3                            1 1 conv6_3 bn6_3 0=212

进入load_param接口后：

（1）就会读取magic数，通过magic数是否等于7767517，就可以判断当前param文件是否为最新版本的param文件：

    int magic = 0;
    // 读取magic数
    int nbr = fscanf(fp, "%d", &magic);
    // 读取失败
    if (nbr != 1)
    {
        fprintf(stderr, "issue with param file\n");
        return -1;
    }
    // 最新的magic数
    if (magic != 7767517)
    {
        fprintf(stderr, "param is too old, please regenerate\n");
        return -1;
    }

（2）解析出网络的layer层数及blob数：

    // 对layer进行解析
    int layer_count = 0;
    int blob_count = 0;
    // 层数 && blob数
    nbr = fscanf(fp, "%d %d", &layer_count, &blob_count);
    // 层数和blob数读取失败
    if (nbr != 2 || layer_count <= 0 || blob_count <= 0)
    {
        fprintf(stderr, "issue with param file\n");
        return -1;
    }
 
    // resize网络的layers和blobs
    layers.resize((size_t)layer_count);
    blobs.resize((size_t)blob_count);

（3）遍历所有的layer，解析每个layer层的类型（layer_type）、名称（layer_name）、输入数（bottom_count）和输出数（top_count）：

        int nscan = 0;
 
        // layer的类型和名字
        char layer_type[257];
        char layer_name[257];
        int bottom_count = 0;
        int top_count = 0;
        // 读取层type，name，输入bottom数和输出top数目
        nscan = fscanf(fp, "%256s %256s %d %d", layer_type, layer_name, &bottom_count, &top_count);
        // 如果解析失败
        if (nscan != 4)
        {
            continue;
        }

（4）根据layer的类型，创建layer

        // 创建layer
        Layer* layer = create_layer(layer_type);
        // layer_type不是默认类型
        if (!layer)
        {   
            // 从自定义layer读取
            layer = create_custom_layer(layer_type);
        }
        // 如果自定义layer中也不存在当前类型layer
        if (!layer)
        {
            fprintf(stderr, "layer %s not exists or registered\n", layer_type);
            clear();
            return -1;
        }

（5）设置layer参数：layer的类型、名字、输入和输出：

         // layer的type和name
        layer->type = std::string(layer_type);
        layer->name = std::string(layer_name);

在设置输入时，如果当前blob名不存在，就将当前blob名添加到net的blobs数组里面，查看一下Blob定义可以看到：

// blob
// name && producer && consumers
class Blob
{
public:
    // empty
    Blob();
 
public:
#if NCNN_STRING
    // blob name
    std::string name;
#endif // NCNN_STRING
    // layer index which produce this blob as output
    // 生产者
    int producer;
    // layer index which need this blob as input
    // 消费者
    std::vector<int> consumers;
};

Blob用于记录数据传输过程，producer记录当前blob从那一层产生的，consumer记录当前blob被哪些层进行调用：

        // layer的输入
        layer->bottoms.resize(bottom_count);
 
        // 解析layer的输入
        for (int j=0; j<bottom_count; j++)
        {
            char bottom_name[257];
            // 解析botoom的name
            nscan = fscanf(fp, "%256s", bottom_name);
            if (nscan != 1)
            {
                continue;
            }
 
            // 按照bottom的name查找对应blob的index
            int bottom_blob_index = find_blob_index_by_name(bottom_name);
            // 如果没有查找到bottom_name对应的blob
            // 将向blobs数组中插入一个名为bottom_name的blob
            if (bottom_blob_index == -1)
            {
                // 设置第blob_index个blob的参数
                Blob& blob = blobs[blob_index];
 
                // blob的索引
                bottom_blob_index = blob_index;
 
                // 设置blob的name
                blob.name = std::string(bottom_name);
 
                // 更新全局的blob索引
                blob_index++;
            }
 
            // 设置当前blob的参数
            Blob& blob = blobs[bottom_blob_index];
 
            // 使用当前blob记录数据传输关系
            // 第i层以当前blob为输入
            blob.consumers.push_back(i);
 
            // 第i层layer的第j个输入
            layer->bottoms[j] = bottom_blob_index;
        }

设置输出的过程和这个类似，就不重复了，最后就是参数载入了，例如，前面的param文件后面的参数：

     0=64 1=3 11=3 5=1 6=1728

代码如下：

        // 解析blob名后面跟随的特定参数字典pd
        int pdlr = pd.load_param(fp);
        if (pdlr != 0)
        {
            fprintf(stderr, "ParamDict load_param failed\n");
            continue;
        }
 
        // layer载入param
        int lr = layer->load_param(pd);
        if (lr != 0)
        {
            fprintf(stderr, "layer load_param failed\n");
            continue;
        }
 
        layers[i] = layer;