tensorrt 安装和事例程序跑通记录

该文描述了在Ubuntu18.04系统上安装TensorRT8.4.1.5和CUDA11.2的步骤,包括下载、解压、创建CMakeLists.txt、设置环境变量、编译和运行sampleMNIST示例。在编译过程中遇到一些关于过时函数的警告,但最终成功运行并显示了输出结果。
摘要由CSDN通过智能技术生成

本机环境:

ubutnu1804 cuda11.2

最终环境配置:cuda11.2,TensorRT8.4.1.5,cudnn8.2.4

1.下载TensorRT-8.4.1.5.Linux.x86_64-gen.cuda-11.6.cudnn8.4.tar.gz

https://developer.nvidia.com/nvidia-tensorrt-8x-download

2.tar安装
tar -xzvf TensorRT-8.4.1.5.Linux.x86_64-gnu.cuda-11.6.cudnn8.4.tar.gz

3.进入TensorRT-8.4.1.5/sample/sampleMNIST目录
cd TensorRT-8.4.1.5/sample/sampleMNIST

4.创建CMakeLists.txt
需要设置和本机实际对应的TensorRT安装路径
cmake_minimum_required(VERSION 3.13)
project(TensorRT_test)
set(CMAKE_CXX_STANDARD 11)
 
set(SAMPLES_COMMON_SOURCES "/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp")
add_executable(TensorRT_test sampleMNIST.cpp ${SAMPLES_COMMON_SOURCES})
 
# add TensorRT8
include_directories(/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include)
include_directories(/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common)
set(TENSORRT_LIB_PATH "/home/anktech/Storage/Meng/TensorRT-8.4.1.5/lib")
file(GLOB LIBS "${TENSORRT_LIB_PATH}/*.so")
 
# add CUDA
find_package(CUDA REQUIRED)
message("CUDA_LIBRARIES:${CUDA_LIBRARIES}")
message("CUDA_INCLUDE_DIRS:${CUDA_INCLUDE_DIRS}")
include_directories(${CUDA_INCLUDE_DIRS})




# link
target_link_libraries(TensorRT_test ${LIBS} ${CUDA_LIBRARIES})
5.添加环境变量(根据实际路径,也要把cudnn的库目录添加进去)
export LD_LIBRARY_PATH=/home/anktech/Storage/Meng/TensorRT-8.4.1.5/lib:$LD_LIBRARY_PATH
export LD_LIBRARY_PATH=/media/anktech/10E09BBBE09BA58E/3rd/cudnn-11.4-linux-x64-v8.2.4.15/lib64:$LD_LIBRARY_PATH

*这里cudnn需要下载并解压到你自己的目录

6.编译和运行
mkdir build && cd build
cmake ..
make
 
./TensorRT_test

 *cmake log

-- The C compiler identification is GNU 7.5.0
-- The CXX compiler identification is GNU 7.5.0
-- Check for working C compiler: /usr/bin/cc
-- Check for working C compiler: /usr/bin/cc - works
-- Detecting C compiler ABI info
-- Detecting C compiler ABI info - done
-- Detecting C compile features
-- Detecting C compile features - done
-- Check for working CXX compiler: /usr/bin/c++
-- Check for working CXX compiler: /usr/bin/c++ - works
-- Detecting CXX compiler ABI info
-- Detecting CXX compiler ABI info - done
-- Detecting CXX compile features
-- Detecting CXX compile features - done
-- Looking for pthread.h
-- Looking for pthread.h - found
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD
-- Performing Test CMAKE_HAVE_LIBC_PTHREAD - Failed
-- Looking for pthread_create in pthreads
-- Looking for pthread_create in pthreads - not found
-- Looking for pthread_create in pthread
-- Looking for pthread_create in pthread - found
-- Found Threads: TRUE  
-- Found CUDA: /usr/local/cuda-11.2 (found version "11.2") 
CUDA_LIBRARIES:/usr/local/cuda-11.2/lib64/libcudart_static.a;Threads::Threads;dl;/usr/lib/x86_64-linux-gnu/librt.so
CUDA_INCLUDE_DIRS:/usr/local/cuda-11.2/include
-- Configuring done
-- Generating done
-- Build files have been written to: /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/build

*make log

Scanning dependencies of target TensorRT_test
[ 33%] Building CXX object CMakeFiles/TensorRT_test.dir/sampleMNIST.cpp.o
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferLegacyDims.h:16:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:16,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h: In member function ‘virtual bool nvinfer1::IGpuAllocator::deallocate(void*)’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1450:26: warning: ‘virtual void nvinfer1::IGpuAllocator::free(void*)’ is deprecated [-Wdeprecated-declarations]
         this->free(memory);
                          ^
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1381:33: note: declared here
     TRT_DEPRECATED virtual void free(void* const memory) noexcept = 0;
                                 ^~~~
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h: At global scope:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6258:88: warning: ‘IFullyConnectedLayer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t nbOutputs, Weights kernelWeights, Weights biasWeights) noexcept
                                                                                        ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:1443:22: note: declared here
 class TRT_DEPRECATED IFullyConnectedLayer : public ILayer
                      ^~~~~~~~~~~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6800:101: warning: ‘IRNNv2Layer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) noexcept
                                                                                                     ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:3281:22: note: declared here
 class TRT_DEPRECATED IRNNv2Layer : public ILayer
                      ^~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp: In member function ‘bool SampleMNIST::build()’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:142:47: warning: ‘void nvinfer1::IBuilder::setMaxBatchSize(int32_t)’ is deprecated [-Wdeprecated-declarations]
     builder->setMaxBatchSize(mParams.batchSize);
                                               ^
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:8795:25: note: declared here
     TRT_DEPRECATED void setMaxBatchSize(int32_t batchSize) noexcept
                         ^~~~~~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp: In member function ‘bool SampleMNIST::infer()’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:339:97: warning: ‘bool nvinfer1::IExecutionContext::enqueue(int32_t, void* const*, cudaStream_t, CUevent_st**)’ is deprecated [-Wdeprecated-declarations]
     if (!context->enqueue(mParams.batchSize, buffers.getDeviceBindings().data(), stream, nullptr))
                                                                                                 ^
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:17:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/buffers.h:20,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/sampleMNIST/sampleMNIST.cpp:27:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntime.h:1948:25: note: declared here
     TRT_DEPRECATED bool enqueue(
                         ^~~~~~~
[ 66%] Building CXX object CMakeFiles/TensorRT_test.dir/home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp.o
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logging.h:21:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.h:21,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp:18:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h: In member function ‘virtual bool nvinfer1::IGpuAllocator::deallocate(void*)’:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1450:26: warning: ‘virtual void nvinfer1::IGpuAllocator::free(void*)’ is deprecated [-Wdeprecated-declarations]
         this->free(memory);
                          ^
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInferRuntimeCommon.h:1381:33: note: declared here
     TRT_DEPRECATED virtual void free(void* const memory) noexcept = 0;
                                 ^~~~
In file included from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/sampleOptions.h:30:0,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logging.h:22,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.h:21,
                 from /home/anktech/Storage/Meng/TensorRT-8.4.1.5/samples/common/logger.cpp:18:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h: At global scope:
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6258:88: warning: ‘IFullyConnectedLayer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t nbOutputs, Weights kernelWeights, Weights biasWeights) noexcept
                                                                                        ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:1443:22: note: declared here
 class TRT_DEPRECATED IFullyConnectedLayer : public ILayer
                      ^~~~~~~~~~~~~~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:6800:101: warning: ‘IRNNv2Layer’ is deprecated [-Wdeprecated-declarations]
         ITensor& input, int32_t layerCount, int32_t hiddenSize, int32_t maxSeqLen, RNNOperation op) noexcept
                                                                                                     ^~~~~~~~
/home/anktech/Storage/Meng/TensorRT-8.4.1.5/include/NvInfer.h:3281:22: note: declared here
 class TRT_DEPRECATED IRNNv2Layer : public ILayer
                      ^~~~~~~~~~~
[100%] Linking CXX executable TensorRT_test
[100%] Built target TensorRT_test

*运行结果

&&&& RUNNING TensorRT.sample_mnist [TensorRT v8401] # ./TensorRT_test
[07/01/2023-17:27:41] [I] Building and running a GPU inference engine for MNIST
[07/01/2023-17:27:42] [I] [TRT] [MemUsageChange] Init CUDA: CPU +314, GPU +0, now: CPU 320, GPU 844 (MiB)
[07/01/2023-17:27:42] [I] [TRT] [MemUsageChange] Init builder kernel library: CPU +207, GPU +68, now: CPU 544, GPU 912 (MiB)
[07/01/2023-17:27:42] [W] [TRT] The implicit batch dimension mode has been deprecated. Please create the network with NetworkDefinitionCreationFlag::kEXPLICIT_BATCH flag whenever possible.
[07/01/2023-17:27:43] [I] [TRT] [MemUsageChange] Init cuBLAS/cuBLASLt: CPU +398, GPU +166, now: CPU 944, GPU 1078 (MiB)
[07/01/2023-17:27:43] [I] [TRT] [MemUsageChange] Init cuDNN: CPU +483, GPU +204, now: CPU 1427, GPU 1282 (MiB)
[07/01/2023-17:27:43] [W] [TRT] TensorRT was linked against cuDNN 8.4.1 but loaded cuDNN 8.2.4
[07/01/2023-17:27:43] [I] [TRT] Local timing cache in use. Profiling results in this builder pass will not be stored.
[07/01/2023-17:27:47] [I] [TRT] Detected 1 inputs and 1 output network tensors.
[07/01/2023-17:27:47] [I] [TRT] Total Host Persistent Memory: 8832
[07/01/2023-17:27:47] [I] [TRT] Total Device Persistent Memory: 0
[07/01/2023-17:27:47] [I] [TRT] Total Scratch Memory: 0
[07/01/2023-17:27:47] [I] [TRT] [MemUsageStats] Peak memory usage of TRT CPU/GPU memory allocators: CPU 1 MiB, GPU 884 MiB
[07/01/2023-17:27:47] [I] [TRT] [BlockAssignment] Algorithm ShiftNTopDown took 0.040401ms to assign 3 blocks to 11 nodes requiring 57860 bytes.
[07/01/2023-17:27:47] [I] [TRT] Total Activation Memory: 57860
[07/01/2023-17:27:47] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in building engine: CPU +0, GPU +4, now: CPU 0, GPU 4 (MiB)
[07/01/2023-17:27:47] [I] [TRT] [MemUsageChange] Init CUDA: CPU +0, GPU +0, now: CPU 1832, GPU 1425 (MiB)
[07/01/2023-17:27:47] [I] [TRT] Loaded engine size: 1 MiB
[07/01/2023-17:27:47] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in engine deserialization: CPU +0, GPU +1, now: CPU 0, GPU 1 (MiB)
[07/01/2023-17:27:48] [I] [TRT] [MemUsageChange] TensorRT-managed allocation in IExecutionContext creation: CPU +0, GPU +0, now: CPU 0, GPU 1 (MiB)
[07/01/2023-17:27:48] [I] Input:
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@%=#@@@@@%=%@@@@@@@@@@
@@@@@@@           %@@@@@@@@@
@@@@@@@           %@@@@@@@@@
@@@@@@@#:-#-.     %@@@@@@@@@
@@@@@@@@@@@@#    #@@@@@@@@@@
@@@@@@@@@@@@@    #@@@@@@@@@@
@@@@@@@@@@@@@:  :@@@@@@@@@@@
@@@@@@@@@%+==   *%%%%%%%%%@@
@@@@@@@@%                 -@
@@@@@@@@@#+.          .:-%@@
@@@@@@@@@@@*     :-###@@@@@@
@@@@@@@@@@@*   -%@@@@@@@@@@@
@@@@@@@@@@@*   *@@@@@@@@@@@@
@@@@@@@@@@@*   @@@@@@@@@@@@@
@@@@@@@@@@@*   #@@@@@@@@@@@@
@@@@@@@@@@@*   *@@@@@@@@@@@@
@@@@@@@@@@@*   *@@@@@@@@@@@@
@@@@@@@@@@@*   @@@@@@@@@@@@@
@@@@@@@@@@@*   @@@@@@@@@@@@@
@@@@@@@@@@@@+=#@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@
@@@@@@@@@@@@@@@@@@@@@@@@@@@@

[07/01/2023-17:27:48] [I] Output:
0: 
1: 
2: 
3: 
4: 
5: 
6: 
7: **********
8: 
9: 

&&&& PASSED TensorRT.sample_mnist [TensorRT v8401] # ./TensorRT_test

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
好的,下面是一个简单的PID算法STC32事例程序,实现了控制一个直流电机的速度: ``` #include "STC32.h" // PID参数 float KP = 1.0; float KI = 0.1; float KD = 0.01; // 采样周期 float T = 0.01; // 目标速度 float target_speed = 100; // 电机速度 float motor_speed = 0; // PID变量 float last_error = 0; float error_sum = 0; // 电机控制 void control_motor(float pid_output) { // 将PID输出限制在[-255, 255]之间 if (pid_output > 255) { pid_output = 255; } else if (pid_output < -255) { pid_output = -255; } // 设置电机PWM输出 if (pid_output > 0) { P1_4 = 0; P1_5 = 1; PWM1 = pid_output; } else { P1_4 = 1; P1_5 = 0; PWM1 = -pid_output; } } // PID计算 float pid_calc(float current_speed) { float error = target_speed - current_speed; float error_rate = (error - last_error) / T; error_sum += error * T; float pid_output = KP * error + KI * error_sum + KD * error_rate; last_error = error; return pid_output; } // 主函数 int main() { // 初始化GPIO和PWM P1M1 = 0; P1M0 = 0xFF; PWM_CR = 0x82; PWM_CMR = 0x02; // 循环计算PID并控制电机 while (1) { // 读取电机速度 motor_speed = read_motor_speed(); // 计算PID输出 float pid_output = pid_calc(motor_speed); // 控制电机 control_motor(pid_output); // 延时一段时间,等待下一次采样 delay_ms(10); } return 0; } ``` 上述代码中,`KP`、`KI`和`KD`是PID的三个参数,`T`是采样周期,`target_speed`是目标速度,`motor_speed`是电机实际速度,`last_error`和`error_sum`是PID计算中需要保存的变量。 在`pid_calc()`函数中,先计算误差值和误差变化率,累计误差值,再根据PID公式计算出PID输出。在`control_motor()`函数中,将PID输出限制在[-255, 255]之间,并根据输出控制电机的PWM输出。 在主函数中,循环读取电机速度,计算PID输出,并控制电机。需要注意的是,在实际应用中,还需要根据具体的电机和采样周期调整PID参数,以获得更好的控制效果。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值