running Apollo on orin(arm64/aarch64) Perception 编译记录

好消息~apollo已经发布了orin版本:https://github.com/ApolloAuto/apollo/issues/15090

 另外还有4090显卡的支持:

https://github.com/ApolloAuto/apollo/issues/14821

最近开始尝试在orin上编译perception模块,在这边记录一下。

其他模块的编译见地址:running Apollo on orin(arm64/aarch64) 移植记录_Scott_D_的博客-CSDN博客

orin为arm64架构,并且cuda版本>11.4,使用tensorRT8,cundnn8.2.4,但是apollo的perception代码有部分是基于tensorRT7的接口写的,所以需要手动写一些包装器把8的接口改装成7的。

理论上本文的方法对于想要在4090显卡上面运行apollo也是有帮助的,因为4090的cuda版本为12.x,也对应tensorRT8,同样需要改写接口。

一、环境准备

在apollo容器中安装cuda11.4、cudnn8.2.4(arm版本):

cuDNN Archive | NVIDIA 开发者

在里面选择 cuDNN Runtime Library for Ubuntu20.04 aarch64sbsa (Deb)、

cuDNN Developer Library for Ubuntu20.04 aarch64sbsa (Deb)

放到docker内用dpkg -i cuDNN-xxxx 进行安装,先安装runtime再安装developer

安装TensorRT8,这个可以去搜教程。

二、编译过程

bash apollo.sh build_opt_gpu

1. 报错:unrecognized option “-msse4.1”;

注释

/apollo/modules/perception/lidar/lib/ground_detector/spatio_temporal_ground_detector/BUILD第十行:

copts = ["-msse4.1"],

因为arm架构不支持此x86的编译指令

2. 报错: undeclared type “__m128i”;

这是SSE2 指令集内部函数的数据类型,arm不支持,可以在modules/perception/common/i_lib/core下面添加头文件SSE2NEON.h,然后让i_basic.h包含他,并在BUILD中添加sse2neon.h

 下载地址https://codeload.github.com/DLTcollab/sse2neon/tar.gz/refs/tags/v1.5.0

3. 报错:‘DimsNCHW’ in namespace ‘nvinfer1’ does not name a type;

改造tensorRT接口,相关代码来自Can I upgrade the CUDA and TensorRT version inside the apollo docker? · Issue #14858 · ApolloAuto/apollo · GitHub

首先在modules/perception/inference/tensorrt/路径下新建一个文件arm_wrappers.h

#pragma once

#include <NvInferLegacyDims.h>

namespace nvinfer1 {

class DimsNCHW : public Dims4 {
 public:
    DimsNCHW() : Dims4() {}
    DimsNCHW(
      int32_t batch_size, int32_t channels,
      int32_t height, int32_t width)
        : Dims4(batch_size, channels, height, width) {}

    int32_t& n() {
      return d[0];
    }

    int32_t n() const {
      return d[0];
    }

    int32_t& c() {
      return d[1];
    }

    int32_t c() const {
      return d[1];
    }

    int32_t& h() {
      return d[2];
    }

    int32_t h() const {
      return d[2];
    }

    int32_t& w() {
      return d[3];
    }

    int32_t w() const {
      return d[3];
    }
};

class DimsCHW : public Dims3 {
 public:
    DimsCHW() : Dims3() {}
    DimsCHW(int32_t channels, int32_t height, int32_t width)
      : Dims3(channels, height, width) {}

    int32_t& c() {
      return d[0];
    }

    int32_t c() const {
      return d[0];
    }

    int32_t& h() {
      return d[1];
    }

    int32_t h() const {
      return d[1];
    }

    int32_t& w() {
      return d[2];
    }

    int32_t w() const {
      return d[2];
    }
};

}  // namespace nvinfer1

在modules/perception/inference/tensorrt/batch_stream.h以及modules/perception/inference/tensorrt/rt_common.h中包含这个文件。在他们的BUILD文件的hdrs中添加这个文件。

然后修改modules/perception/inference/tensorrt/plugins/softmax_plugin.h文件,将文件内容替换为

/******************************************************************************
 * Copyright 2018 The Apollo Authors. All Rights Reserved.
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 * http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 *****************************************************************************/

#pragma once

#include "modules/perception/inference/tensorrt/rt_common.h"

namespace apollo {
namespace perception {
namespace inference {

#ifndef TENSORRT_8
class SoftmaxPlugin : public nvinfer1::IPlugin {
 public:
  SoftmaxPlugin(const SoftmaxParameter &param, nvinfer1::Dims in_dims) {
    input_dims_.nbDims = in_dims.nbDims;
    for (int i = 0; i < in_dims.nbDims; i++) {
      input_dims_.d[i] = in_dims.d[i];
      input_dims_.type[i] = in_dims.type[i];
    }
    axis_ = param.axis() - 1;
    CHECK_GE(axis_, 0);
    CHECK_LE(axis_ + 1, input_dims_.nbDims);

    inner_num_ = 1;
    for (int i = axis_ + 1; i < input_dims_.nbDims; i++) {
      inner_num_ *= input_dims_.d[i];
    }
    outer_num_ = 1;
    for (int i = 0; i < axis_; i++) {
      outer_num_ *= input_dims_.d[i];
    }
    cudnnCreateTensorDescriptor(&input_desc_);
    cudnnCreateTensorDescriptor(&output_desc_);
  }

  SoftmaxPlugin() {}

  ~SoftmaxPlugin() {
    cudnnDestroyTensorDescriptor(input_desc_);
    cudnnDestroyTensorDescriptor(output_desc_);
  }
  virtual int initialize() {
    cudnnCreate(&cudnn_);  // initialize cudnn and cublas
    cublasCreate(&cublas_);
    return 0;
  }
  virtual void terminate() {
    cublasDestroy(cublas_);
    cudnnDestroy(cudnn_);
  }
  int getNbOutputs() const override { return 1; }

  nvinfer1::Dims getOutputDimensions(int index, const nvinfer1::Dims *inputs,
                                     int nbInputDims) override {
    nvinfer1::Dims out_dims = inputs[0];
    return out_dims;
  }

  void configure(const nvinfer1::Dims *inputDims, int nbInputs,
                 const nvinfer1::Dims *outputDims, int nbOutputs,
                 int maxBatchSize) override {
    input_dims_ = inputDims[0];
  }

  size_t getWorkspaceSize(int maxBatchSize) const override { return 0; }

  int enqueue(int batchSize, const void *const *inputs, void **outputs,
              void *workspace, cudaStream_t stream) override;

  size_t getSerializationSize() override { return 0; }

  void serialize(void *buffer) override {
    char *d = reinterpret_cast<char *>(buffer), *a = d;
    size_t size = getSerializationSize();
    CHECK_EQ(d, a + size);
  }

 private:
  cudnnHandle_t cudnn_;
  cublasHandle_t cublas_;
  nvinfer1::Dims input_dims_;
  int axis_;
  int inner_num_;
  int outer_num_;
  cudnnTensorDescriptor_t input_desc_;
  cudnnTensorDescriptor_t output_desc_;
};

#else
class SoftmaxPlugin : public nvinfer1::IPluginV2Ext {
 public:
  SoftmaxPlugin(const SoftmaxParameter &param, nvinfer1::Dims in_dims) {
    input_dims_.nbDims = in_dims.nbDims;
    for (int i = 0; i < in_dims.nbDims; i++) {
      input_dims_.d[i] = in_dims.d[i];
    }
    axis_ = param.axis() - 1;
    CHECK_GE(axis_, 0);
    CHECK_LE(axis_ + 1, input_dims_.nbDims);

    inner_num_ = 1;
    for (int i = axis_ + 1; i < input_dims_.nbDims; i++) {
      inner_num_ *= input_dims_.d[i];
    }
    outer_num_ = 1;
    for (int i = 0; i < axis_; i++) {
      outer_num_ *= input_dims_.d[i];
    }
    cudnnCreateTensorDescriptor(&input_desc_);
    cudnnCreateTensorDescriptor(&output_desc_);
  }

  SoftmaxPlugin() {}

  ~SoftmaxPlugin() {
    cudnnDestroyTensorDescriptor(input_desc_);
    cudnnDestroyTensorDescriptor(output_desc_);
  }
  virtual int32_t initialize() noexcept {
    cudnnCreate(&cudnn_);  // initialize cudnn and cublas
    cublasCreate(&cublas_);
    return 0;
  }
  virtual void terminate() noexcept {
    cublasDestroy(cublas_);
    cudnnDestroy(cudnn_);
  }
  int32_t getNbOutputs() const noexcept override { return 1; }

  nvinfer1::Dims getOutputDimensions(int32_t index,
      const nvinfer1::Dims *inputs, int32_t nbInputDims)
      noexcept override {
    nvinfer1::Dims out_dims = inputs[0];
    return out_dims;
  }

  void configureWithFormat(const nvinfer1::Dims *inputDims, int32_t nbInputs,
                 const nvinfer1::Dims *outputDims, int32_t nbOutputs,
                 nvinfer1::DataType type, nvinfer1::PluginFormat format,
                 int32_t maxBatchSize) noexcept override {
    input_dims_ = inputDims[0];
  }

  size_t getWorkspaceSize(int32_t maxBatchSize)
      const noexcept override { return 0; }

  int32_t enqueue(int32_t batchSize, const void *const *inputs,
              void *const *outputs, void *workspace, cudaStream_t stream)
              noexcept override;

  size_t getSerializationSize() const noexcept override { return 0; }

  void serialize(void *buffer) const noexcept override {
    char *d = reinterpret_cast<char *>(buffer), *a = d;
    size_t size = getSerializationSize();
    CHECK_EQ(d, a + size);
  }

  nvinfer1::AsciiChar const* getPluginType()
      const noexcept override {
    return plugin_type;
  }

  nvinfer1::AsciiChar const* getPluginVersion()
      const noexcept override {
    return plugin_version;
  }

  void setPluginNamespace(const nvinfer1::AsciiChar* libNamespace)
      noexcept override {
    plugin_namespace = const_cast<nvinfer1::AsciiChar*>(libNamespace);
  }

  nvinfer1::AsciiChar const* getPluginNamespace()
      const noexcept override {
    return const_cast<nvinfer1::AsciiChar*>(plugin_namespace);
  }

  bool supportsFormat(nvinfer1::DataType type,
      nvinfer1::PluginFormat format) const noexcept override {
    return true;
  }

  void destroy() noexcept override {
    delete this;
  }

  nvinfer1::IPluginV2Ext* clone() const noexcept override {
    SoftmaxPlugin* p = new SoftmaxPlugin();
    cudnnCreate(&(p->cudnn_));  // initialize cudnn and cublas
    cublasCreate(&(p->cublas_));
    p->axis_ = axis_;
    p->inner_num_ = inner_num_;
    p->outer_num_ = outer_num_;
    p->plugin_namespace = plugin_namespace;
    (p->input_dims_).nbDims = input_dims_.nbDims;
    for (int i = 0; i < input_dims_.nbDims; i++) {
      (p->input_dims_).d[i] = input_dims_.d[i];
    }
    cudnnCreateTensorDescriptor(&(p->input_desc_));
    cudnnCreateTensorDescriptor(&(p->output_desc_));
    return p;
  }

  bool isOutputBroadcastAcrossBatch(int32_t outputIndex,
      bool const *inputIsBroadcasted, int32_t nbInputs)
      const noexcept override {
    return false;
  }

  bool canBroadcastInputAcrossBatch(int32_t inputIndex)
      const noexcept override {
    return false;
  }

  nvinfer1::DataType getOutputDataType(int32_t index,
      nvinfer1::DataType const *inputTypes, int32_t nbInputs)
      const noexcept {
    return nvinfer1::DataType::kFLOAT;
  }

  void configurePlugin(
    nvinfer1::Dims const *inputDims, int32_t nbInputs,
    nvinfer1::Dims const *outputDims, int32_t nbOutputs,
    nvinfer1::DataType const *inputTypes,
    nvinfer1::DataType const *outputTypes,
    bool const *inputIsBroadcast, bool const *outputIsBroadcast,
    nvinfer1::PluginFormat floatFormat, int32_t maxBatchSize) noexcept {}

 private:
  cudnnHandle_t cudnn_;
  cublasHandle_t cublas_;
  nvinfer1::Dims input_dims_;
  int axis_;
  int inner_num_;
  int outer_num_;
  nvinfer1::AsciiChar* plugin_namespace;
  const nvinfer1::AsciiChar* plugin_type = "";
  const nvinfer1::AsciiChar* plugin_version = "";
  cudnnTensorDescriptor_t input_desc_;
  cudnnTensorDescriptor_t output_desc_;
};

#endif

}  // namespace inference
}  // namespace perception
}  // namespace apollo

主要的替换内容有:

  1. 从initialize()开始的函数都要声明为noexcept
  2. serialize、getSerializationSize函数还要额外声明const
  3. enqueue函数的形参outputs要声明为 void* const*,修改对应.cu文件内的对应的enqueue函数,同样修改outputs形参
  4. 增加私有成员变量的读写函数
  5. 增加destroy、clone函数

对其他plugin头文件都进行类似的操作

寄了寄了,由于apollo使用的一些模型和库用的是apollo编译好的x86的库,所以perception暂时无法在orin上编译了,但是如果自己有lidarcomponent等perception传感器模型的话,可以替换掉apollo的库进行perception的编译。

  • 1
    点赞
  • 3
    收藏
    觉得还不错? 一键收藏
  • 3
    评论
In file included from /home/acceler/code/apollo_ros/apollo_ros/src/apollo.ros-1.0.0-master/apollo_common/include/apollo_common/apollo_app.h:46:0, from /home/acceler/code/apollo_ros/apollo_ros/src/apollo.ros-1.0.0-master/apollo_common/src/apollo_app.cc:33: /home/acceler/code/apollo_ros/apollo_ros/src/apollo.ros-1.0.0-master/apollo_common/include/apollo_common/log.h:40:10: fatal error: glog/logging.h: No such file or directory #include <glog/logging.h> ^~~~~~~~~~~~~~~~ compilation terminated. apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/build.make:62: recipe for target 'apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/src/apollo_app.cc.o' failed make[2]: *** [apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/src/apollo_app.cc.o] Error 1 make[2]: *** Waiting for unfinished jobs.... In file included from /home/acceler/code/apollo_ros/apollo_ros/src/apollo.ros-1.0.0-master/apollo_common/include/apollo_common/adapters/adapter_manager.h:48:0, from /home/acceler/code/apollo_ros/apollo_ros/src/apollo.ros-1.0.0-master/apollo_common/src/adapters/adapter_manager.cc:33: /home/acceler/code/apollo_ros/apollo_ros/src/apollo.ros-1.0.0-master/apollo_common/include/apollo_common/adapters/adapter.h:49:10: fatal error: glog/logging.h: No such file or directory #include <glog/logging.h> ^~~~~~~~~~~~~~~~ compilation terminated. apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/build.make:110: recipe for target 'apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/src/adapters/adapter_manager.cc.o' failed make[2]: *** [apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/src/adapters/adapter_manager.cc.o] Error 1 CMakeFiles/Makefile2:3894: recipe for target 'apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/all' failed make[1]: *** [apollo.ros-1.0.0-master/apollo_common/CMakeFiles/apollo_common.dir/all] Error 2 make[1]: *** Waiting for unfinished jobs.... [ 54%] Linking CXX executable /home/acceler/code/apollo_ros/apollo_ros/devel/lib/IntegratedNavigation/IntegratedNavigation_node [ 54%] Built target IntegratedNavigation_node [ 55%] Linking CXX executable /home/acceler/code/apollo_ros/apollo_ros/devel/lib/TimeSynchronierProcess/timeSynchronierProcess_node [ 55%] Built target timeSynchronierProcess_node Makefile:140: recipe for target 'all' failed make: *** [all] Error 2 Invoking "make -j4 -l4" failed
07-23

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 3
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值