Tensorrt_add custom layer to tensorflow network in tenorrt

pip install tensorflow-gpu==2.4.0 -i https://pypi.tuna.tsinghua.edu.cn/simple

1 description

  • 通过c++实现一个clip layer
  • 编译生成共享库
  • 使用python动态加载该库文件
  • 通过TensorRT的PluginRegistry 注册插件
  • 在UFF解析中使用这个插件

2 run

  1. 编译clip插件
mkdir build && pushd build
cmake ..
make -j
popd

Note: If any of the dependencies are not installed in their default locations, you can manually specify them. For example:

cmake .. -DPYBIND11_DIR=/path/to/pybind11/
-DCMAKE_CUDA_COMPILER=/usr/local/cuda-x.x/bin/nvcc  (Or adding path/to/nvcc into $PATH)
-DPYTHON3_INC_DIR=/usr/include/python3.6/
-DTRT_LIB=/path/to/tensorrt/lib/
-DTRT_INCLUDE=/path/to/tensorrt/include/
  1. trian model

python3 lenet5.py

  1. inference

python3 sample.py

3 coding

clipKernel.h

#ifndef CLIP_KERNEL_H
#define CLIP_KERNEL_H
#include "NvInfer.h"

int clipInference(cudaStream_t stream, int n, float clipMin, float clipMax, const void* input, void* output);
#endif

clipkernel.cu

#include <clipKernel.h>
template<typename T>
__device__ __forceinline__ const T& min(const T& a,const T& b)
{
    return (a>b)?b:a;
}
template<typename T>
__device__ __forceinline__ const T& max(const T&a, const T& b)
{
    return (a>b)?a:b;
}
template<typename T, unsigned nthdsPerCTA>
__launch_bounds__(nthdsPerCTA)
    __global__ void clipKernel(int n, const T clipMin, const T clipMax, const T* input, T* output)
{
    for(int i=blockIdx.x*nthdsPerCTA + threadIdx.x; i<n; i+= gridDim.x*nthdsPerCTA)
    {
        output[i] = min<T>(max<T>(input[i], clipMin), clipMax);
    }
}

int clipInference(cudaStream_t stream, int n,float clipMin, float clipMax, const void* input, void* output)
{
    const int blockSize = 521;
    const int grideSize = (n + blockSize-1) /blockSize;
    clipKernel<float,blockSize><<<grideSize, blockSize, 0, stream>>>(n,clipMin, clipMax,
                                                                     static_cast<const float*>(input),
                                                                     static_cast<float*>(output));
    return 0;

}

customClipPlugin.h

#ifndef CUSTOM_CLIP_PLUGIN_H
#define CUSTOM_CLIP_PLUGIN_H
#include "NvInferPlugin.h"
#include <string>
#include <vector>

class ClipPlugin : public nvinfer1::IPluginV2
{
public:
    ClipPlugin(const std::string name, float clipMin, float clipMax);
    ClipPlugin(const std::string name, const void* data, size_t length);
    ClipPlugin() =delete ;

    int getNbOutputs() const override;
    nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims) override;

    int initialize() override;
    void terminate() override;
    size_t getWorkspaceSize(int) const override {return 0;};
    int enqueue(int batchSize, const void* const* inputs, void** outputs, void* workspace, cudaStream_t stream) override;
    size_t getSerializationSize() const override;
    void serialize(void* buffer) const override;
    void configureWithFormat(const nvinfer1::Dims* inputs, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize) override;
    bool supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const override;
    const char* getPluginType() const override;
    const char* getPluginVersion() const override;
    void destroy() override;
    nvinfer1::IPluginV2* clone() const override;
    void setPluginNamespace(const char* pluginNamespace) override;
    const char* getPluginNamespace() const override;

private:
    const std::string mLayerName;
    float mClipMin,mClipMax;
    size_t mInputVolume;
    std::string mNamespace;
};
class ClipPluginCreator : public nvinfer1::IPluginCreator
{
public:
    ClipPluginCreator();
    const char* getPluginName() const override;
    const char* getPluginVersion() const override;
    const nvinfer1::PluginFieldCollection* getFieldNames() override;

    nvinfer1::IPluginV2* createPlugin(const char* name, const nvinfer1::PluginFieldCollection* fc) override;
    nvinfer1::IPluginV2* deserializePlugin(const char* name, const void* serialData, size_t serialLength) override;

    void setPluginNamespace(const char* pluginNamespace) override;
    const char* getPluginNamespace() const override;

private:
    static nvinfer1::PluginFieldCollection mFC;
    static std::vector<nvinfer1::PluginField> mPluginAttributes;
    std::string mNamespace;
};
#endif

customClipPlugin.cpp

#include "customClipPlugin.h"
#include "NvInfer.h"
#include "clipKernel.h"
#include <vector>
#include <cassert>
#include <cstring>
//constants
namespace  {
static const char* CLIP_PLUGIN_VERSION{"1"};
static const char* CLIP_PLUGIN_NAME{"CustomClipPlugin"};
}
//static class fileds initialization
nvinfer1::PluginFieldCollection ClipPluginCreator::mFC{};
std::vector<nvinfer1::PluginField> ClipPluginCreator::mPluginAttributes;

template<typename T>
void writeToBuffer(char*& buffer,const T& val)
{
    *reinterpret_cast<T*>(buffer) = val;
    buffer+= sizeof(T);
}
template<typename T>
T readFromBuffer(const char*& buffer)
{
    T val = *reinterpret_cast<const T*>(buffer);
    buffer += sizeof (T);
    return val;
}
ClipPlugin::ClipPlugin(const std::string name, float clipMin, float clipMax)
    :mLayerName(name)
    ,mClipMin(clipMin)
    ,mClipMax(clipMax)
{

}
ClipPlugin::ClipPlugin(const std::string name, const void* data, size_t length)
    :mLayerName(name)
{
    const char*d = static_cast<const char*>(data);
    const char*a= d;
    mClipMin = readFromBuffer<float>(d);
    mClipMax = readFromBuffer<float>(d);
    assert(d==(a+length));
}
const char* ClipPlugin::getPluginType() const
{
    return CLIP_PLUGIN_NAME;
}
const char* ClipPlugin::getPluginVersion() const
{
    return CLIP_PLUGIN_VERSION;
}
int ClipPlugin::getNbOutputs() const
{
    return 1;
}
nvinfer1::Dims ClipPlugin::getOutputDimensions(int index, const nvinfer1::Dims* inputs, int nbInputDims)
{
    assert(nbInputDims == 1);
    assert(index ==0);
    return *inputs;
}
int ClipPlugin::initialize()
{
    return 0;
}
int ClipPlugin::enqueue(int batchSize, const void *const *inputs, void **outputs, void *workspace, cudaStream_t stream)
{
    int status = -1;
    void* output = outputs[0];
    status = clipInference(stream, mInputVolume* batchSize, mClipMin, mClipMax, inputs[0], output);
    return status;
}
size_t ClipPlugin::getSerializationSize() const
{
    return 2*sizeof (float);
}
void ClipPlugin::serialize(void *buffer) const
{
    char *d = static_cast<char*>(buffer);
    const char* a= d;
    writeToBuffer(d, mClipMin);
    writeToBuffer(d, mClipMax);
    assert(d== a+ getSerializationSize());
}
void ClipPlugin::configureWithFormat(const nvinfer1::Dims* inputs, int nbInputs, const nvinfer1::Dims* outputDims, int nbOutputs, nvinfer1::DataType type, nvinfer1::PluginFormat format, int maxBatchSize)
{
    assert(nbOutputs == 1);
    assert(type == nvinfer1::DataType::kFLOAT);
    assert(format == nvinfer1::PluginFormat::kNCHW);

    size_t volume=1;
    for(int i=0;i<inputs->nbDims;i++)
    {
        volume*= inputs->d[i];
    }
    mInputVolume = volume;
}
bool ClipPlugin::supportsFormat(nvinfer1::DataType type, nvinfer1::PluginFormat format) const
{
    if(type == nvinfer1::DataType::kFLOAT && format == nvinfer1::PluginFormat::kNCHW)
        return true;
    else {
        return false;
    }
}
void ClipPlugin::terminate()
{

}
void ClipPlugin::destroy()
{
    delete this;
}
nvinfer1::IPluginV2* ClipPlugin::clone() const
{
    auto plugin = new ClipPlugin(mLayerName, mClipMin, mClipMax);
    plugin->setPluginNamespace(mNamespace.c_str());
    return plugin;
}
void ClipPlugin::setPluginNamespace(const char *pluginNamespace)
{
    mNamespace=pluginNamespace;
}
const char* ClipPlugin::getPluginNamespace() const
{
    return mNamespace.c_str();
}
ClipPluginCreator::ClipPluginCreator()
{
    //describe clipPlugin's required pluginfield arguments
    mPluginAttributes.emplace_back(nvinfer1::PluginField("clipMin", nullptr,nvinfer1::PluginFieldType::kFLOAT32,1));
    mPluginAttributes.emplace_back(nvinfer1::PluginField("clipMax", nullptr,nvinfer1::PluginFieldType::kFLOAT32,1));
    //Fill pluginfieldcollection with pluginfield arguments metadata
    mFC.nbFields = mPluginAttributes.size();
    mFC.fields = mPluginAttributes.data();
}
const char* ClipPluginCreator::getPluginName() const
{
    return CLIP_PLUGIN_NAME;
}
const char* ClipPluginCreator::getPluginVersion() const
{
    return CLIP_PLUGIN_VERSION;
}
const nvinfer1::PluginFieldCollection* ClipPluginCreator::getFieldNames()
{
    return &mFC;
}
nvinfer1::IPluginV2* ClipPluginCreator::createPlugin(const char* name,const nvinfer1::PluginFieldCollection* fc)
{
    float clipMin, clipMax;
    const nvinfer1::PluginField* fields = fc->fields;
    assert(fc->nbFields == 2);
    for(int i=0;i<fc->nbFields;i++)
    {
        if(strcmp(fields[i].name,"clipMin") == 0)
        {
            assert(fields[i].type == nvinfer1::PluginFieldType::kFLOAT32);
            clipMin = *(static_cast<const float*>(fields[i].data));
        }
        else if(strcmp(fields[i].name,"clipMax") ==0)
        {
            assert(fields[i].type == nvinfer1::PluginFieldType::kFLOAT32);
            clipMax = *(static_cast<const float*>(fields[i].data));
        }
    }
    return new ClipPlugin(name,clipMin,clipMax);
}
nvinfer1::IPluginV2* ClipPluginCreator::deserializePlugin(const char* name, const void* serialData, size_t serialLength)
{
    return new ClipPlugin(name,serialData,serialLength);
}
void ClipPluginCreator::setPluginNamespace(const char *pluginNamespace)
{
    mNamespace = pluginNamespace;
}
const char* ClipPluginCreator::getPluginNamespace() const
{
    return mNamespace.c_str();
}

lenet5.py

import tensorflow as tf
import numpy as np
import os
WORKING_DIR = './'

MODEL_DIR = './models'

def load_data():
    mnist = tf.keras.datasets.mnist
    (x_train, y_train),(x_test,y_test) =mnist.load_data()
    x_train, x_test = x_train/255.0, x_test/255.0
    x_train = np.reshape(x_train,(-1,1,28,28))
    x_test = np.reshape(x_test,(-1,1,28,28))
    return x_train, y_train, x_test, y_test
def build_model():
    model = tf.keras.models.Sequential()
    model.add(tf.keras.layers.InputLayer(input_shape=[1,28,28],name="InputLayer"))
    model.add(tf.keras.layers.Flatten())
    model.add(tf.keras.layers.Dense(512))
    model.add(tf.keras.layers.Activation(activation=tf.nn.relu6, name="ReLU6"))
    model.add(tf.keras.layers.Dense(10, activation=tf.nn.softmax, name="OutputLayer"))
    return model
def train_model():
    model = build_model()
    model.compile(optimizer='adam',
                  loss='sparse_categorical_crossentropy',
                  metrics=['accuracy'])
    x_train, y_train, x_test, y_test = load_data()
    model.fit(x_train,y_train,epochs=2,verbose=1)
    test_loss, test_acc = model.evaluate(x_test, y_test)
    print("Test loss: {} \n Test accuracy: {}".format(test_loss, test_acc))
    return model
def maybe_mkdir(dir_path):
    if not os.path.exists(dir_path):
        os.makedirs(dir_path)
from tensorflow.python.framework.convert_to_constants import convert_variables_to_constants_v2
def save_model(model):
    if 0:
        output_names = model.output.op.name
        sess = tf.compat.v1.keras.backend.get_session()
        graphdef = sess.graph.as_graph_def()
        forzen_graph = tf.graph_util.convert_variables_to_constants(sess, graphdef, [output_names])
        forzen_graph = tf.graph_util.remove_training_nodes(forzen_graph)
        maybe_mkdir(MODEL_DIR)
        model_path = os.path.join(MODEL_DIR, "trained_lenet5.pb")
        with open(model_path, "wb") as ofile:
            ofile.write(forzen_graph.SerializeToString())
    else:
        # Save the weights
        # training code -----------------
        tf.saved_model.save(obj=model, export_dir="./models")
        # Convert Keras model to ConcreteFunction
        # 注意这个Input,是自己定义的输入层名
        full_model = tf.function(lambda Input: model(Input))
        full_model = full_model.get_concrete_function(
            tf.TensorSpec(model.inputs[0].shape, model.inputs[0].dtype))

        # Get frozen ConcreteFunction
        frozen_func = convert_variables_to_constants_v2(full_model)
        frozen_func.graph.as_graph_def()

        layers = [op.name for op in frozen_func.graph.get_operations()]
        print("-" * 50)
        print("Frozen model layers: ")
        for layer in layers:
            print(layer)

        print("-" * 50)
        print("Frozen model inputs: ")
        print(frozen_func.inputs)
        print("Frozen model outputs: ")
        print(frozen_func.outputs)

        # Save frozen graph from frozen ConcreteFunction to hard drive
        tf.io.write_graph(graph_or_graph_def=frozen_func.graph,
                          logdir="./frozen_models",
                          name="mnist.pb",
                          as_text=False)

if __name__ == "__main__":
    model = train_model()
    save_model(model)


sample.py

import sys
import os
import ctypes
from random import randint
from PIL import Image
import numpy as np
import tensorflow as tf
import pycuda.driver as cuda
import pycuda.autoinit
import tensorrt as trt
import graphsurgeon as gs
import uff
sys.path.insert(1,
                os.path.join(
                    os.path.dirname(os.path.realpath(__file__)),
                    os.pardir
                )
)
import common
import lenet5
MNIST_IMAGE_SIZE =28
MNIST_CHANNELS =1
MNIST_CLASSES = 10
WORKING_DIR = './'
CLIP_PLUGIN_LIBRARY = os.path.join(
    WORKING_DIR,
    'build/libclipplugin.so'
)
MODEL_PATH = os.path.join(
    WORKING_DIR,
    './frozen_models/mnist.pb'
)
TRT_LOGGER = trt.Logger(trt.Logger.WARNING)
class ModelData(object):
    INPUT_NAME = "Input"
    INPUT_SHAPE = (MNIST_CHANNELS, MNIST_IMAGE_SIZE, MNIST_IMAGE_SIZE)
    RELU6_NAME = "ReLU6"
    OUTPUT_NAME = "sequential/OutputLayer/Softmax"
    OUTPUT_SHAPE = (MNIST_IMAGE_SIZE,)
    DATA_TYPE =trt.float32
def prepare_namespace_plugin_map():
    trt_relu6 = gs.create_plugin_node(name="trt_relu6", op="CustomClipPlugin", clipMin=0.0, clipMax=6.0)
    namespace_plugin_map = {
        ModelData.RELU6_NAME:trt_relu6
    }
    return namespace_plugin_map
def model_path_to_uff_path(model_path):
    uff_path = os.path.splitext(model_path)[0] + ".uff"
    return uff_path
def model_to_uff(model_path):
    dynamic_graph = gs.DynamicGraph(model_path)
    dynamic_graph.collapse_namespaces(prepare_namespace_plugin_map())
    output_uff_path = model_path_to_uff_path(model_path)
    uff.from_tensorflow(
        dynamic_graph.as_graph_def(),
        [ModelData.OUTPUT_NAME],
        output_filename=output_uff_path,
        text=True
    )
    return output_uff_path
def build_engine(model_path):
    with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.UffParser() as parser:
        builder.max_workspace_size = common.GiB(1)
        uff_path = model_to_uff(model_path)
        parser.register_input(ModelData.INPUT_NAME, ModelData.INPUT_SHAPE)
        parser.register_output(ModelData.OUTPUT_NAME)
        parser.parse(uff_path, network)
        return builder.build_cuda_engine(network)
def load_normalized_test_case(pagelocked_buffer):
    _,_,x_test,y_test = lenet5.load_data()
    num_test = len(x_test)
    case_num = randint(0, num_test-1)
    img=x_test[case_num].ravel()
    np.copyto(pagelocked_buffer, img)
    return y_test[case_num]
def main():
    if not os.path.isfile(CLIP_PLUGIN_LIBRARY):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load library ({}).".format(CLIP_PLUGIN_LIBRARY),
            "Please build the Clip sample plugin.",
            "For more information, see the included README.md"
        ))
    ctypes.CDLL(CLIP_PLUGIN_LIBRARY)

    # Load pretrained model
    if not os.path.isfile(MODEL_PATH):
        raise IOError("\n{}\n{}\n{}\n".format(
            "Failed to load model file ({}).".format(MODEL_PATH),
            "Please use 'python lenet5.py' to train and save the model.",
            "For more information, see the included README.md"
        ))

    # Build an engine and retrieve the image mean from the model.
    with build_engine(MODEL_PATH) as engine:
        inputs, outputs, bindings, stream = common.allocate_buffers(engine)
        with engine.create_execution_context() as context:
            print("\n=== Testing ===")
            test_case = load_normalized_test_case(inputs[0].host)
            print("Loading Test Case: " + str(test_case))
            # The common do_inference function will return a list of outputs - we only have one in this case.
            [pred] = common.do_inference(context, bindings=bindings, inputs=inputs, outputs=outputs, stream=stream)
            print("Prediction: " + str(np.argmax(pred)))


if __name__ == "__main__":
    main()

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值