Tensorrt- 使用API指定无格式I/O

该博客介绍了如何利用TensorRT进行模型优化,包括设置TensorFormat以指定浮点16和整数8位精度的无格式I/O,通过ITensor::setAllowFormats避免格式转换。同时,通过BuilderFlag::kSTRICT_TYPES约束构建器选择无格式路径。文章还展示了代码示例,演示了如何构建和执行预处理及预测引擎,以及如何处理警告和验证输出。

摘要生成于 C知道 ,由 DeepSeek-R1 满血版支持, 前往体验 >

demo展示图和使用API为Float16和int8精度的无格式I/O显式指定为

  • TensorFOrmat::kLINEAR
  • TensorFormat::kCHW2
  • TensorFOrmat::kHWC8

实现方式:

ITensor::setAllowFormats:指定预期支持的格式,不用使将I/O张量从FP32转换为FP32的格式化操作。

BuilderFlag::kSTRICT_TYPES:将分配给构建器配置,以润徐构建器选择无格式路径,而不是最路径。

  • 如果自由重定义格式尉氏县,则将选择具有重定义格式的最快路径,并显示以下警告消息:
  • Warning: no implementation obeys reformatting-free rlues,…
7.7.2 code
#include "common/BatchStream.h"
#include "common/EntropyCalibrator.h"
#include "common/argsParser.h"
#include "common/buffers.h"
#include "common/common.h"
#include "common/logger.h"
#include "common/logging.h"
#include "common/parserOnnxConfig.h"

#include "NvInfer.h"
#include <cuda_runtime_api.h>

#include <random>

const std::string gSampleName = "TensorRT.sample_dynamic_reshape";

class SamplesDynamicReshape
{
    template <typename T>
    using SampleUniquePtr = std::unique_ptr<T,samplesCommon::InferDeleter>;
public:
    SamplesDynamicReshape(const samplesCommon::OnnxSampleParams& params)
        :mParams(params)
    {

    }
    bool build();
    bool prepare();
    bool infer();
private:
    bool buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder);
    bool builderPredicetionEngine(const SampleUniquePtr<nvinfer1::IBuilder>& builder);

    Dims loadPGMFile(const std::string& fileName);
    bool validateOutput(int digit);

    samplesCommon::OnnxSampleParams mParams;

    nvinfer1::Dims mPredictionInputDims;
    nvinfer1::Dims mPredictionOutputDims;

    SampleUniquePtr<nvinfer1::ICudaEngine> mPreprocessorEngine{nullptr},mPredictionEngine{nullptr};

    SampleUniquePtr<nvinfer1::IExecutionContext> mPreprocessorContext{nullptr},mPredictionContext{nullptr};

    samplesCommon::ManagedBuffer mInput{};
    samplesCommon::DeviceBuffer mPredictionInput{};

    samplesCommon::ManagedBuffer mOutput{};

    template <typename T>
    SampleUniquePtr<T> makeUnique(T* t)
    {
        return SampleUniquePtr<T>{t};
    }

};
bool SamplesDynamicReshape::build()
{
    auto builder = makeUnique(nvinfer1::createInferBuilder(sample::gLogger.getTRTLogger()));
    if(!builder)
    {
        sample::gLogError <<"Create inference builder failed."<<std::endl;
        return false;
    }
    return builderPredicetionEngine(builder) && buildPreprocessorEngine(builder);
}
bool SamplesDynamicReshape::buildPreprocessorEngine(const SampleUniquePtr<nvinfer1::IBuilder> &builder)
{
    auto preprocessorNetwork = makeUnique(
                builder->createNetworkV2(1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH)));
    if(!preprocessorNetwork)
    {
        sample::gLogError <<"Create network failed. "<<std::endl;
        return false;
    }
    auto input = preprocessorNetwork->addInput("input",nvinfer1::DataType::kFLOAT,Dims4{-1,1,-1,-1});
    auto resizeLayer = preprocessorNetwork->addResize(*input);
    resizeLayer->setOutputDimensions(mPredictionInputDims);
    preprocessorNetwork->markOutput(*resizeLayer->getOutput(0));

    auto preprocessorConfig = makeUnique(builder->createBuilderConfig());
    if(!preprocessorConfig)
    {
        sample::gLogError<<"Create builder config failed."<<std::endl;
        return false;
    }
    auto profile = builder->createOptimizationProfile();
    profile->setDimensions(input->getName(),OptProfileSelector::kMIN,Dims4{1,1,1,1});
    profile->setDimensions(input->getName(),OptProfileSelector::kOPT,Dims4{1,1,28,28});
    profile->setDimensions(input->getName(),OptProfileSelector::kMAX,Dims4{1,1,56,56});
    preprocessorConfig->addOptimizationProfile(profile);

    auto profileCalib = builder->createOptimizationProfile();
    const int calibBatchSize{256};

    profileCalib->setDimensions(input->getName(),OptProfileSelector::kMIN,Dims4{calibBatchSize,1,28,28});
    profileCalib->setDimensions(input->getName(),OptProfileSelector::kOPT,Dims4{calibBatchSize,1,28,28});
    profileCalib->setDimensions(input->getName(),OptProfileSelector::kMAX,Dims4{calibBatchSize,1,28,28});
    preprocessorConfig->setCalibrationProfile(profileCalib);

    std::unique_ptr<IInt8Calibrator> calibrator;
    if(mParams.int8)
    {
        preprocessorConfig->setFlag(BuilderFlag::kINT8);
        const int nCalibBatches{10};
        MNISTBatchStream calibrationStream(
            calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
        calibrator.reset(
            new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPreprocessor", "input"));
        preprocessorConfig->setInt8Calibrator(calibrator.get());
    }
    mPreprocessorEngine = makeUnique(builder->buildEngineWithConfig(*preprocessorNetwork,*preprocessorConfig));
    if(!mPreprocessorEngine)
    {
        sample::gLogError << "Preprocessor engine build failed."<<std::endl;
        return false;
    }
    sample::gLogInfo << "Profile dimensions in preprocessor engine:: "<<std::endl;
    sample::gLogInfo <<"   Mninmum = "<<mPreprocessorEngine->getProfileDimensions(0,0,OptProfileSelector::kMIN)
                    <<std::endl;
    sample::gLogInfo <<"   Optimum = "<<mPreprocessorEngine->getProfileDimensions(0,0,OptProfileSelector::kOPT)
                    <<std::endl;
    sample::gLogInfo <<"   Maximum =  "<<mPreprocessorEngine->getProfileDimensions(0,0,OptProfileSelector::kMAX)
                    <<std::endl;
    return true;
}
bool SamplesDynamicReshape::builderPredicetionEngine(const SampleUniquePtr<nvinfer1::IBuilder> &builder)
{
    const auto explicitBatchh = 1U << static_cast<uint32_t>(NetworkDefinitionCreationFlag::kEXPLICIT_BATCH);
    auto network = makeUnique(builder->createNetworkV2(explicitBatchh));
    if(!network)
    {
        sample::gLogError<<"Creat network failed."<<std::endl;
        return false;
    }
    auto parser = samplesCommon::infer_object(nvonnxparser::createParser(*network,sample::gLogger.getTRTLogger()));
    bool parsingSuccess = parser->parseFromFile(locateFile(mParams.onnxFileName,mParams.dataDirs).c_str(),
                                                static_cast<int>(sample::gLogger.getReportableSeverity()));
    if(!parsingSuccess)
    {
        sample::gLogError<<"Failed to parse model."<<std::endl;
        return false;
    }

    auto softmax = network->addSoftMax(*network->getOutput(0));
    softmax->setAxes(1<<1);
    network->unmarkOutput(*network->getOutput(0));
    network->markOutput(*softmax->getOutput(0));

    mPredictionInputDims = network->getInput(0)->getDimensions();
    mPredictionOutputDims = network->getOutput(0)->getDimensions();

    auto config = makeUnique(builder->createBuilderConfig());
    if(!config)
    {
        sample::gLogError<<"Create builder config failed."<<std::endl;
        return false;
    }
    config->setMaxWorkspaceSize(16_MiB);
    if(mParams.fp16)
    {
        config->setFlag(BuilderFlag::kFP16);
    }
    auto profileCalib = builder->createOptimizationProfile();
    const auto inputName = mParams.inputTensorNames[0].c_str();
    const int calibBatchSize{1};
    profileCalib->setDimensions(inputName, OptProfileSelector::kMIN, Dims4{calibBatchSize, 1, 28, 28});
    profileCalib->setDimensions(inputName, OptProfileSelector::kOPT, Dims4{calibBatchSize, 1, 28, 28});
    profileCalib->setDimensions(inputName, OptProfileSelector::kMAX, Dims4{calibBatchSize, 1, 28, 28});
    config->setCalibrationProfile(profileCalib);

    std::unique_ptr<IInt8Calibrator> calibrator;
    if (mParams.int8)
    {
        config->setFlag(BuilderFlag::kINT8);
        int nCalibBatches{10};
        MNISTBatchStream calibrationStream(
            calibBatchSize, nCalibBatches, "train-images-idx3-ubyte", "train-labels-idx1-ubyte", mParams.dataDirs);
        calibrator.reset(
            new Int8EntropyCalibrator2<MNISTBatchStream>(calibrationStream, 0, "MNISTPrediction", inputName));
        config->setInt8Calibrator(calibrator.get());
    }

    mPredictionEngine = makeUnique(builder->buildEngineWithConfig(*network, *config));
    if (!mPredictionEngine)
    {
        sample::gLogError << "Prediction engine build failed." << std::endl;
        return false;
    }
    return true;

}
bool SamplesDynamicReshape::prepare()
{
    mPreprocessorContext = makeUnique(mPreprocessorEngine->createExecutionContext());
    if(!mPreprocessorContext)
    {
        sample::gLogError<<"Preprocessor context build failed."<<std::endl;
        return false;
    }

    mPredictionContext = makeUnique(mPredictionEngine->createExecutionContext());
    if(!mPredictionContext)
    {
        sample::gLogError<<"Prediction contect build failed."<<std::endl;
        return false;
    }
    mPredictionInput.resize(mPredictionInputDims);
    mOutput.hostBuffer.resize(mPredictionOutputDims);
    mOutput.deviceBuffer.resize(mPredictionOutputDims);
    return true;

}
bool SamplesDynamicReshape::infer()
{
    std::random_device rd{};
    std::default_random_engine generator{rd()};
    std::uniform_int_distribution<int> digitDistribution{0,9};
    int digit = digitDistribution(generator);

    Dims inputDims = loadPGMFile(locateFile(std::to_string(digit)+".pgm",mParams.dataDirs));
    mInput.deviceBuffer.resize(inputDims);
    CHECK(cudaMemcpy(
              mInput.deviceBuffer.data(),mInput.hostBuffer.data(),mInput.hostBuffer.nbBytes(),cudaMemcpyHostToDevice));
    CHECK_RETURN_W_MSG(mPreprocessorContext->setBindingDimensions(0,inputDims),false,"Invalid binding dimensions.");

    // We can only run inference once all dynamic input shapes have been specified.
    if (!mPreprocessorContext->allInputDimensionsSpecified())
    {
        return false;
    }

    // Run the preprocessor to resize the input to the correct shape
    std::vector<void*> preprocessorBindings = {mInput.deviceBuffer.data(), mPredictionInput.data()};
    // For engines using full dims, we can use executeV2, which does not include a separate batch size parameter.
    bool status = mPreprocessorContext->executeV2(preprocessorBindings.data());
    if (!status)
    {
        return  false;
    }
    std::vector<void*> predictionBindings = {mPredictionInput.data(),mOutput.deviceBuffer.data()};
    status = mPredictionContext->executeV2(predictionBindings.data());
    if(!status)
    {
        return false;
    }
    CHECK(cudaMemcpy(mOutput.hostBuffer.data(),mOutput.deviceBuffer.data(),mOutput.deviceBuffer.nbBytes(),cudaMemcpyDeviceToHost));
    return validateOutput(digit);

}
Dims SamplesDynamicReshape::loadPGMFile(const std::string& fileName)
{
    std::ifstream infile(fileName,std::ifstream::binary);
    assert(infile.is_open() && " Attempting to read from a file that is not open.");

    std::string magic;
    int h,w,max;
    infile>>magic>>h>>w>>max;
    infile.seekg(1,infile.cur);
    Dims4 inputDims{1,1,h,w};
    size_t vol = samplesCommon::volume(inputDims);
    std::vector<uint8_t> fileData(vol);
    infile.read(reinterpret_cast<char*>(fileData.data()),vol);

    sample::gLogInfo <<"Input: \n";
    for(size_t i=0;i<vol;i++)
    {
        sample::gLogInfo << (" .:-=+*#%@"[fileData[i] / 26]) << (((i + 1) % w) ? "" : "\n");
    }
    sample::gLogInfo<<std::endl;

    mInput.hostBuffer.resize(inputDims);
    float* hostDataBuffer = static_cast<float*>(mInput.hostBuffer.data());
    std::transform(fileData.begin(),fileData.end(),hostDataBuffer,
                   [](uint8_t x){return 1.0-static_cast<float>(x/255.0);});
    return inputDims;
}
bool SamplesDynamicReshape::validateOutput(int digit)
{
    const float* buffRaw = static_cast<const float*>(mOutput.hostBuffer.data());
    std::vector<float> prob(buffRaw,buffRaw+mOutput.hostBuffer.size());

    int curIndex{0};
    for(const auto&elem : prob)
    {
        sample::gLogInfo <<"Prob "<<curIndex<<"  "<<std::fixed<<std::setw(5)<<std::setprecision(4)
                        <<"  "
                       <<"Class "<<curIndex<<": "<<std::string(int(std::floor(elem*10 + 0.5f)),'*')
                      <<std::endl;
        ++curIndex;
    }
    int predictedDigit = std::max_element(prob.begin(),prob.end())-prob.begin();
    return digit == predictedDigit;
}
samplesCommon::OnnxSampleParams initializeSampleParams(const samplesCommon::Args & args)
{
    samplesCommon::OnnxSampleParams params;
    if (args.dataDirs.empty()) //!< Use default directories if user hasn't provided directory paths
    {
        params.dataDirs.push_back("data/");
    }
    else //!< Use the data directory provided by the user
    {
        params.dataDirs = args.dataDirs;
    }
    params.onnxFileName = "mnist.onnx";
    params.inputTensorNames.push_back("Input3");
    params.outputTensorNames.push_back("Plus214_Output_0");
    params.int8 = args.runInInt8;
    params.fp16 = args.runInFp16;
    return params;
}

void printHelpInfo()
{
    std::cout << "Usage: ./sample_dynamic_reshape [-h or --help] [-d or --datadir=<path to data directory>]"
              << std::endl;
    std::cout << "--help, -h      Display help information" << std::endl;
    std::cout << "--datadir       Specify path to a data directory, overriding the default. This option can be used "
                 "multiple times to add multiple directories. If no data directories are given, the default is to use "
                 "(data/samples/mnist/, data/mnist/)"
              << std::endl;
    std::cout << "--int8          Run in Int8 mode." << std::endl;
    std::cout << "--fp16          Run in FP16 mode." << std::endl;
}
int main(int argc,char** argv)
{
    samplesCommon::Args args;
    bool argsOK = samplesCommon::parseArgs(args, argc, argv);
    if (!argsOK)
    {
        sample::gLogError << "Invalid arguments" << std::endl;
        printHelpInfo();
        return EXIT_FAILURE;
    }
    if (args.help)
    {
        printHelpInfo();
        return EXIT_SUCCESS;
    }
    auto sampleTest = sample::gLogger.defineTest(gSampleName, argc, argv);

    sample::gLogger.reportTestStart(sampleTest);

    SamplesDynamicReshape sample{initializeSampleParams(args)};

    if (!sample.build())
    {
        return sample::gLogger.reportFail(sampleTest);
    }
    if (!sample.prepare())
    {
        return sample::gLogger.reportFail(sampleTest);
    }
    if (!sample.infer())
    {
        return sample::gLogger.reportFail(sampleTest);
    }

    return sample::gLogger.reportPass(sampleTest);

}

### 问题分析 当在 TensorRT-YOLO 项目中使用 `mingw32-make` 出现 `'No targets specified and no makefile found'` 错误时,这表明当前目录下缺少有效的 Makefile 文件或者未正确指定目标。该问题的根本原因是 Windows 默认生成的是 MSVC 工程文件而非 Unix 风格的 Makefile[^1]。 --- ### 解决方案 #### 方法一:生成适合 MinGW 的 Makefile 文件 1. **安装 CMake** - 确保已经安装了最新版本的 CMake,并将其路径添加到系统的 PATH 中。 2. **创建构建目录** - 在项目根目录外新建一个专门用于构建的文件夹(例如名为 `build`),以避免污染源代码树。 ```bash mkdir build && cd build ``` 3. **配置 CMake 为目标生成 Makefile** - 使用 `-G "MinGW Makefiles"` 参数告诉 CMake 创建适配 MinGW 的 Makefile 文件: ```bash cmake .. -G "MinGW Makefiles" ``` - 如果需要额外传递编译选项,可以通过 `-D` 参数实现。例如,指定调试模式: ```bash cmake .. -G "MinGW Makefiles" -DCMAKE_BUILD_TYPE=Debug ``` 4. **验证生成结果** - 成功运行上述命令后,应在当前目录发现新生成的 Makefile 文件。 5. **调用 mingw32-make 构建项目** - 执行以下命令启动构建过程: ```bash mingw32-make ``` #### 方法二:手动编写简单的 Makefile 如果不想依赖于复杂的工具链管理,也可以考虑手写一份基础版的 Makefile 来满足基本需求: ```Makefile # 自定义 Makefile 示例 CC := gcc CXX := g++ LD := g++ CFLAGS := -Wall -std=c++17 -I../include LDFLAGS := -L../lib -ltensorrt -lcudart TARGET := yolo_tensorrt_example SRCS := $(wildcard ../src/*.cpp) OBJS := $(SRCS:.cpp=.o) $(TARGET): $(OBJS) $(LD) $(LDFLAGS) -o $@ $^ %.o: %.cpp $(CXX) $(CFLAGS) -c -o $@ $< clean: rm -f $(OBJS) $(TARGET) ``` 保存为 `Makefile` 后即可直接通过 `mingw32-make` 进行编译。 #### 方法三:切换至 MSVC 工具链 尽管可以强制使用 MinGW 构建流程,但对于 TensorRT 和 YOLO 类型的大规模高性能计算框架来说,官方文档强烈推荐采用微软提供的 Visual Studio Compiler (MSVC)[^2]。主要原因在于它能更好地与 NVIDIA CUDA SDK 整合以及充分利用硬件加速特性。 1. **安装 Visual Studio** - 下载并安装包含桌面开发工作负载的 Visual Studio 社区版或更高版本。 2. **调整 CMake 设置** - 修改之前的 CMake 命令改为指向 VS Generator: ```bash cmake .. -G "Visual Studio 16 2019" ``` 3. **加载解决方案文件 (.sln)** - 完成之后会得到一个 `.sln` 格式的工程描述符,可以直接导入 IDE 内部完成后续操作。 --- ### 注意事项 - **环境一致性**:无论是选择 MinGW 还是 MSVC,请务必保持整个开发生命周期内的工具链统一性以免引发不可预期的行为差异。 - **依赖项同步更新**:随着技术栈不断演进,第三方库也可能引入新的 API 或移除旧接口,因此定期检查各组件间的兼容状态非常重要。 - **清理残留产物**:每次更改生成器类型前记得先清除之前遗留下来的中间件数据以防干扰判断逻辑准确性。 --- ### 示例代码片段 下面展示了一个典型的基于 CMakeLists.txt 自动生成 Makefile 流程的例子: ```cmake # CMakeLists.txt 示例 cmake_minimum_required(VERSION 3.10) project(TensorRT_YOLO LANGUAGES CXX) set(CMAKE_CXX_STANDARD 17) set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wall") add_executable(tensorrt_yolo main.cpp util.cpp) find_package(TensorRT REQUIRED) target_link_libraries(tensorrt_yolo PRIVATE ${TENSORRT_LIBRARIES}) ``` 随后只需遵循前述指导步骤便可顺利解决问题。 --- ###
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值