DS-SLAM编译caff7(环境rtx3070+cuda11.4+cudnn8.2)

第一个错误

Found cuDNN: ver. ??? found (include: /usr/local/cuda-11.4/include, library: /usr/local/cuda-11.4/lib64/libcudnn.so)

解决方法

搜索Cuda.cmake

file(READ ${CUDNN_INCLUDE}/cudnn_version.h CUDNN_VERSION_FILE_CONTENTS)
#file(READ ${CUDNN_INCLUDE}/cudnn.h CUDNN_VERSION_FILE_CONTENTS)

第二个错误

没有那个文件或目录

解决方法

在~/sofeware/cudnn-11.4/cuda/include文件夹下执行

sudo cp cudnn_ops_infer.h /usr/local/cuda-11.4/include
sudo cp cudnn_ops_train.h /usr/local/cuda-11.4/include
sudo cp cudnn_adv_infer.h /usr/local/cuda-11.4/include
sudo cp cudnn_adv_train.h /usr/local/cuda-11.4/include
sudo cp cudnn_cnn_infer.h /usr/local/cuda-11.4/include
sudo cp cudnn_cnn_train.h /usr/local/cuda-11.4/include
sudo cp cudnn_backend.h /usr/local/cuda-11.4/include

第三修改cudnn_conv_layer.cpp文件

替换内容

template <typename Dtype>
void CuDNNConvolutionLayer<Dtype>::Reshape(
    const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
  ConvolutionLayer<Dtype>::Reshape(bottom, top);
  CHECK_LE(2, this->num_spatial_axes_)
      << "CuDNNConvolution input must have 2 spatial axes "
      << "(e.g., height and width). "
      << "Use 'engine: CAFFE' for general ND convolution.";
  bottom_offset_ = this->bottom_dim_ / this->group_;
  top_offset_ = this->top_dim_ / this->group_;
  const int height = bottom[0]->shape(this->channel_axis_ + 1);
  const int width = bottom[0]->shape(this->channel_axis_ + 2);
  const int height_out = top[0]->shape(this->channel_axis_ + 1);
  const int width_out = top[0]->shape(this->channel_axis_ + 2);

  const int* pad_data = this->pad_.cpu_data();
  const int pad_h = pad_data[0];
  const int pad_w = pad_data[1];
  const int* stride_data = this->stride_.cpu_data();
  const int stride_h = stride_data[0];
  const int stride_w = stride_data[1];
  //#if  CUDNN_VERSION_MIN(8, 0, 0)
  #if 1
  int RetCnt;
  bool found_conv_algorithm;
  size_t free_memory, total_memory;
  cudnnConvolutionFwdAlgoPerf_t     fwd_algo_pref_[4];
  cudnnConvolutionBwdDataAlgoPerf_t bwd_data_algo_pref_[4];
 
  //get memory sizes
  cudaMemGetInfo(&free_memory, &total_memory);
  #else
  // Specify workspace limit for kernels directly until we have a
  // planning strategy and a rewrite of Caffe's GPU memory mangagement
  size_t workspace_limit_bytes = 8*1024*1024;
  #endif
  for (int i = 0; i < bottom.size(); i++) {
    cudnn::setTensor4dDesc<Dtype>(&bottom_descs_[i],
        this->num_,
        this->channels_ / this->group_, height, width,
        this->channels_ * height * width,
        height * width, width, 1);
    cudnn::setTensor4dDesc<Dtype>(&top_descs_[i],
        this->num_,
        this->num_output_ / this->group_, height_out, width_out,
        this->num_output_ * this->out_spatial_dim_,
        this->out_spatial_dim_, width_out, 1);
    cudnn::setConvolutionDesc<Dtype>(&conv_descs_[i], bottom_descs_[i],
        filter_desc_, pad_h, pad_w,
        stride_h, stride_w);
    //#if CUDNN_VERSION_MIN(8, 0, 0)
    #if 1
    // choose forward algorithm for filter
    // in forward filter the CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED is not implemented in cuDNN 8
    CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm_v7(handle_[0],
      bottom_descs_[i],
      filter_desc_,
      conv_descs_[i],
      top_descs_[i],
      4,
      &RetCnt,
      fwd_algo_pref_));
 
    found_conv_algorithm = false;
    for(int n=0;n<RetCnt;n++){
      if (fwd_algo_pref_[n].status == CUDNN_STATUS_SUCCESS &&
          fwd_algo_pref_[n].algo != CUDNN_CONVOLUTION_FWD_ALGO_WINOGRAD_NONFUSED &&
          fwd_algo_pref_[n].memory < free_memory){
        found_conv_algorithm = true;
        fwd_algo_[i]                   = fwd_algo_pref_[n].algo;
        workspace_fwd_sizes_[i]        = fwd_algo_pref_[n].memory;
        break;
      }
    }
    if(!found_conv_algorithm) LOG(ERROR) << "cuDNN did not return a suitable algorithm for convolution.";
    else{
        // choose backward algorithm for filter
        // for better or worse, just a fixed constant due to the missing
        // cudnnGetConvolutionBackwardFilterAlgorithm in cuDNN version 8.0
        bwd_filter_algo_[i] = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
        //twice the amount of the forward search to be save
        workspace_bwd_filter_sizes_[i] = 2*workspace_fwd_sizes_[i];
    }
 
    // choose backward algo for data
    CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm_v7(handle_[0],
      filter_desc_,
      top_descs_[i],
      conv_descs_[i],
      bottom_descs_[i],
      4,
      &RetCnt,
      bwd_data_algo_pref_));
 
    found_conv_algorithm = false;
    for(int n=0;n<RetCnt;n++){
      if (bwd_data_algo_pref_[n].status == CUDNN_STATUS_SUCCESS &&
          bwd_data_algo_pref_[n].algo != CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD &&
          bwd_data_algo_pref_[n].algo != CUDNN_CONVOLUTION_BWD_DATA_ALGO_WINOGRAD_NONFUSED &&
          bwd_data_algo_pref_[n].memory < free_memory){
        found_conv_algorithm = true;
        bwd_data_algo_[i]              = bwd_data_algo_pref_[n].algo;
        workspace_bwd_data_sizes_[i]   = bwd_data_algo_pref_[n].memory;
        break;
      }
    }
    if(!found_conv_algorithm) LOG(ERROR) << "cuDNN did not return a suitable algorithm for convolution.";
    #else
    // choose forward and backward algorithms + workspace(s)
    CUDNN_CHECK(cudnnGetConvolutionForwardAlgorithm(handle_[0],
      bottom_descs_[i],
      filter_desc_,
      conv_descs_[i],
      top_descs_[i],
      CUDNN_CONVOLUTION_FWD_SPECIFY_WORKSPACE_LIMIT,
      workspace_limit_bytes,
      &fwd_algo_[i]));
 
    CUDNN_CHECK(cudnnGetConvolutionForwardWorkspaceSize(handle_[0],
      bottom_descs_[i],
      filter_desc_,
      conv_descs_[i],
      top_descs_[i],
      fwd_algo_[i],
      &(workspace_fwd_sizes_[i])));
 
    // choose backward algorithm for filter
    CUDNN_CHECK(cudnnGetConvolutionBackwardFilterAlgorithm(handle_[0],
          bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_,
          CUDNN_CONVOLUTION_BWD_FILTER_SPECIFY_WORKSPACE_LIMIT,
          workspace_limit_bytes, &bwd_filter_algo_[i]) );
 
    // get workspace for backwards filter algorithm
    CUDNN_CHECK(cudnnGetConvolutionBackwardFilterWorkspaceSize(handle_[0],
          bottom_descs_[i], top_descs_[i], conv_descs_[i], filter_desc_,
          bwd_filter_algo_[i], &workspace_bwd_filter_sizes_[i]));
 
    // choose backward algo for data
    CUDNN_CHECK(cudnnGetConvolutionBackwardDataAlgorithm(handle_[0],
          filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
          CUDNN_CONVOLUTION_BWD_DATA_SPECIFY_WORKSPACE_LIMIT,
        workspace_limit_bytes, &bwd_data_algo_[i]));
 
    // get workspace size
    CUDNN_CHECK(cudnnGetConvolutionBackwardDataWorkspaceSize(handle_[0],
          filter_desc_, top_descs_[i], conv_descs_[i], bottom_descs_[i],
          bwd_data_algo_[i], &workspace_bwd_data_sizes_[i]) );
    #endif
  }
 
  // reduce over all workspace sizes to get a maximum to allocate / reallocate
  size_t total_workspace_fwd = 0;
  size_t total_workspace_bwd_data = 0;
  size_t total_workspace_bwd_filter = 0;
 
  for (size_t i = 0; i < bottom.size(); i++) {
    total_workspace_fwd        = std::max(total_workspace_fwd,
                                     workspace_fwd_sizes_[i]);
    total_workspace_bwd_data   = std::max(total_workspace_bwd_data,
                                     workspace_bwd_data_sizes_[i]);
    total_workspace_bwd_filter = std::max(total_workspace_bwd_filter,
                                     workspace_bwd_filter_sizes_[i]);
  }
  // get max over all operations
  size_t max_workspace = std::max(total_workspace_fwd,
                             total_workspace_bwd_data);
  max_workspace = std::max(max_workspace, total_workspace_bwd_filter);
  // ensure all groups have enough workspace
  size_t total_max_workspace = max_workspace *
                               (this->group_ * CUDNN_STREAMS_PER_GROUP);
 
  // this is the total amount of storage needed over all groups + streams
  if (total_max_workspace > workspaceSizeInBytes) {
    DLOG(INFO) << "Reallocating workspace storage: " << total_max_workspace;
    workspaceSizeInBytes = total_max_workspace;
 
    // free the existing workspace and allocate a new (larger) one
    cudaFree(this->workspaceData);
 
    cudaError_t err = cudaMalloc(&(this->workspaceData), workspaceSizeInBytes);
    if (err != cudaSuccess) {
      // force zero memory path
      for (int i = 0; i < bottom.size(); i++) {
        workspace_fwd_sizes_[i] = 0;
        workspace_bwd_filter_sizes_[i] = 0;
        workspace_bwd_data_sizes_[i] = 0;
        fwd_algo_[i] = CUDNN_CONVOLUTION_FWD_ALGO_IMPLICIT_GEMM;
        bwd_filter_algo_[i] = CUDNN_CONVOLUTION_BWD_FILTER_ALGO_0;
        bwd_data_algo_[i] = CUDNN_CONVOLUTION_BWD_DATA_ALGO_0;
      }
 
      // NULL out all workspace pointers
      for (int g = 0; g < (this->group_ * CUDNN_STREAMS_PER_GROUP); g++) {
        workspace[g] = NULL;
      }
      // NULL out underlying data
      workspaceData = NULL;
      workspaceSizeInBytes = 0;
    }
 
    // if we succeed in the allocation, set pointer aliases for workspaces
    for (int g = 0; g < (this->group_ * CUDNN_STREAMS_PER_GROUP); g++) {
      workspace[g] = reinterpret_cast<char *>(workspaceData) + g*max_workspace;
    }
  }
 
  // Tensor descriptor for bias.
  if (this->bias_term_) {
    cudnn::setTensor4dDesc<Dtype>(&bias_desc_,
        1, this->num_output_ / this->group_, 1, 1);
  }
}
  • 0
    点赞
  • 7
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
DS-SLAM的使用教程可以分为以下几个步骤: 1. 安装pangolin:在装DS-SLAM之前,需要先安装pangolin。确保不要把pangolin装在catkin_ws/src目录下,否则每次安装时都需要运行catkin_make_isolated命令,这会产生大量的编译输出。安装完成后,可以运行以下命令: cd DS-SLAM chmod x DS_SLAM_BUILD.sh ./DS_SLAM_BUILD.sh 2. TUM数据集的生成和配置:在生成TUM数据集之后,需要修改DS_SLAM_TUM.launch文件中的PATH_TO_SEQUENCE和PATH_TO_SEQUENCE/associate.txt路径。修改完成后,可以运行以下命令: cd DS-SLAM roslaunch DS_SLAM_TUM.launch 3. 环境配置:在第一次安装时,可能会遇到编译完成后运行roslaunch命令时意外崩溃的问题。这可能是由于环境配置问题导致的。可以尝试重新安装系统,并按照以下流程进行环境配置: - 安装nvidia显卡驱动 - 安装CUDA10和cudnn7 - 安装ROS(包含OpenCV) - 安装caffe-segnet-cudnn7 - 安装octomap_mapping和octomap_rviz - 安装DS-SLAM源码 以上是DS-SLAM的使用教程,希望对你有帮助!<span class="em">1</span><span class="em">2</span><span class="em">3</span> #### 引用[.reference_title] - *1* *2* *3* [DS-SLAM 编译安装运行全程记录 RTX2060+CUDA10+CUDNN7](https://blog.csdn.net/qq_34131212/article/details/106803808)[target="_blank" data-report-click={"spm":"1018.2226.3001.9630","extra":{"utm_source":"vip_chatgpt_common_search_pc_result","utm_medium":"distribute.pc_search_result.none-task-cask-2~all~insert_cask~default-1-null.142^v93^chatsearchT3_2"}}] [.reference_item style="max-width: 100%"] [ .reference_list ]

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值