CUDA编程--邻近点查询

最新推荐文章于 2024-05-03 11:07:56 发布

MacalDan

最新推荐文章于 2024-05-03 11:07:56 发布

阅读量543

点赞数

分类专栏： CUDA学习笔记文章标签： python

本文链接：https://blog.csdn.net/qq_35781447/article/details/120597132

版权

CUDA学习笔记专栏收录该内容

4 篇文章 0 订阅

订阅专栏

写在前面的话：建议先看完本专栏的前三篇文章再看这篇

本文将介绍如何了查找点云中每个点的邻近点并统计个数

取点云文件

我给了个demo文件，在input文件夹下面，其每行为一个点，每行分别有4个数：index， x， y, z

0 0.190818 2.151146 1.516178
1 0.182662 2.208185 1.506151
2 0.212130 2.120995 1.519654
3 0.195404 2.216945 1.554745
4 0.177847 2.420602 1.464208
5 0.108176 2.408980 1.471528
6 0.285591 2.271129 1.526475
...........................
82881 2.856293 4.802210 0.886675
82882 2.893785 4.858056 0.938935

读取函数如下：

float* read_pc(string filename, float xyz[]){
    ifstream fin;
    fin.open(filename);
    if(!fin) {
        cout << filename << " file could not be opened\n";
        exit(0);
    }
    int idx;
    float x, y, z;
    while(!fin.eof()) {
        fin >> idx >> x >> y >> z;
        xyz[idx*3 + 0] =x;
        xyz[idx*3 + 1] =y;
        xyz[idx*3 + 2] =z;
    }
    return xyz;
}

主函数

主函数中我们需要设置计算邻近点的辐射距离，如下代码中所示，与所选点距离小于0.2m的即可以视为所选点的邻近点

int main() {
    string inputFileName = "/home/bruno/Documents/Clion_Project/cuda_demo/demo3_AdjacentPoint/input/pc.txt";
    const int point_num = 82883;
    const float radius = 0.2;
    float xyz[point_num*3];

    read_pc(inputFileName, xyz);

    checkNearPoints(point_num, xyz, radius);

    return 0;
}

检查临近点

首先我们要清楚我们的输入是什么：点云个数，点云坐标，以及辐射距离; 其次我们要理清楚如何去利用cuda加速计算。最简单的想法：我们有多少个点就开启多少个线程，这样一次就可以把所有的点的临近点算出来。该函数的编写思路与前面几篇文章总结的一样，这里不多说。

void checkNearPoints(const int point_num, float xyz[], const float radius){
    dim3 blocks(DIVUP(point_num, THREADS_PER_BLOCK));
    dim3 threads(THREADS_PER_BLOCK);


    int * ptsCnt_h;
    ptsCnt_h = (int *)malloc(point_num * sizeof(int));

    // define gpu variable
    int * point_num_d;
    float * xyz_d;
    float * radius_d;
    int * ptsCnt_d; //mark the number of adjacent points
    int * adj_point_d; //mark the adjacent points, 1 is true，0 is false

    //generate gpu ram
    CUDA_ERR_CHK( cudaMalloc((void **) &point_num_d, sizeof(int)));
    CUDA_ERR_CHK(cudaMalloc((void **) &xyz_d, 3*point_num*sizeof(float)));
    CUDA_ERR_CHK(cudaMalloc((void **) &radius_d, sizeof(float)));
    CUDA_ERR_CHK(cudaMalloc((void **) &ptsCnt_d, point_num*sizeof(int)));

    // copy host to device
    CUDA_ERR_CHK(cudaMemcpy(point_num_d, &point_num, sizeof(int), cudaMemcpyHostToDevice));
    CUDA_ERR_CHK(cudaMemcpy(xyz_d, xyz, 3*point_num*sizeof(float), cudaMemcpyHostToDevice));
    CUDA_ERR_CHK(cudaMemcpy(radius_d, &radius, sizeof(float), cudaMemcpyHostToDevice));

    // start device kernel
    checkNearPoints_cuda<<<blocks, threads>>>(point_num_d, xyz_d, radius_d, ptsCnt_d);

    // copy device to host
    CUDA_ERR_CHK(cudaMemcpy(ptsCnt_h, ptsCnt_d, point_num*sizeof(float), cudaMemcpyDeviceToHost));

    // release gpu ram
    CUDA_ERR_CHK(cudaFree(point_num_d));
    CUDA_ERR_CHK(cudaFree(xyz_d));
    CUDA_ERR_CHK(cudaFree(radius_d));
    CUDA_ERR_CHK(cudaFree(ptsCnt_d));

    for (int i =0; i< point_num; i++){
        printf("index: %d adjacent point number: %d \n",i, ptsCnt_h[i]);
    }

}

CUDA检查函数

由于我们直接调用CUDA的函数（cudaMencpy cudaMalloc 等），其不会直接返回准确的报错信息，所以我们需要写一个报错检索函数，这样我们可以知道出错的原因：

// https://stackoverflow.com/a/14038590
#define CUDA_ERR_CHK(code) { cuda_err_chk((code), __FILE__, __LINE__); }
inline void cuda_err_chk(cudaError_t code, const char *file, int line, bool abort = true) {
    if (code != cudaSuccess) {
        fprintf(stderr, "\tCUDA ERROR: %s %s %d\n", cudaGetErrorString(code), file, line);
        if (abort) exit(code);
    }
}

添加后我们可以看到如下报错：直接告诉我们在65行，内存溢出了。（说明你的GPU卡内存不够，可以换卡，或者对点进行下采样）

核函数

核函数利用线程ID做检索，通过for循环去遍历所选点外的每一个点。通过计算每个点与所选距离，再与辐射距离进行比较，小于辐射距离的视为邻近点。

__global__ void checkNearPoints_cuda(int *point_num_d, float *xyz_d, float *eps_d, int *ptsCnt_d){
    int th_index = blockIdx.x*blockDim.x + threadIdx.x;
    if (th_index >= *point_num_d) return ;

    ptsCnt_d[th_index] = 0;  // the number of adjacent points
    float o_x = xyz_d[th_index * 3 + 0];
    float o_y = xyz_d[th_index * 3 + 1];
    float o_z = xyz_d[th_index * 3 + 2];

    for (int k =0; k< *point_num_d; k++){
        if(th_index==k) continue;

        float k_x = xyz_d[k * 3 + 0];
        float k_y = xyz_d[k * 3 + 1];
        float k_z = xyz_d[k * 3 + 2];
        float l2 = sqrt((k_x-o_x)*(k_x-o_x)+(k_y-o_y)*(k_y-o_y)+(k_z-o_z)*(k_z-o_z));
        if (l2 <= *eps_d) {
            ptsCnt_d[th_index]= ptsCnt_d[th_index] + 1;
        }
    }
}

CmakeList

Cmakelist我这次给出了一个通用的模板，基本每个demo都可以用，还解决了无法核嵌套的问题。注意要根据自己CUDA安装路径小小更改下下面的代码。

cmake_minimum_required(VERSION 3.19)
project(AdjacentPoint)

set(CMAKE_CXX_STANDARD 14)

# packages
find_package(CUDA REQUIRED)

if(${CUDA_FOUND})
    set(CUDA_SOURCE_PROPERTY_FORMAT OBJ)
    set(CUDA_SEPARABLE_COMPILATION ON)
    include_directories(${CUDA_INCLUDE_DIRS})
    set(CUDA_PROPAGATE_HOST_FLAGS OFF)

    #    set(CUDA_VERBOSE_BUILD ON)
    SET(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-gencode arch=compute_75,code=sm_75;-std=c++14; -rdc=true;  -lcudadevrt)

    link_directories(/usr/local/cuda/lib64)
    MESSAGE(STATUS "found cuda")
else(${CUDA_FOUND})
    MESSAGE(STATUS "cuda not found!")
endif(${CUDA_FOUND})


file(GLOB CURRENT_SOURCES  *.cpp *.cu)
file(GLOB_RECURSE CURRENT_HEADERS  *.h *.hpp *.cuh)

source_group("Source" FILES ${CURRENT_SOURCES})
source_group("Include" FILES ${CURRENT_HEADERS})

CUDA_ADD_EXECUTABLE(AdjacentPoint ${CURRENT_HEADERS} ${CURRENT_SOURCES} ${CUDA_INCLUDE_DIRS})
target_link_libraries(AdjacentPoint /usr/local/cuda/lib64/libcudadevrt.a)

打印结果

结果如下图所示。如果想知道邻近点的索引，可以查看adj_point_d变量，该变量很占内存，如果不需要还是注释掉。

最后，完整代码以及其他demo可以在我的github中获取（不定期更新）：

GitHub - weiguangzhao/cuda_demo: CUDA DEMOhttps://github.com/weiguangzhao/cuda_demo

MacalDan

关注

0
点赞
踩
3

收藏

觉得还不错? 一键收藏
打赏
0
评论
CUDA编程--邻近点查询

写在前面的话：建议先看完本专栏的前三篇文章再看这篇本文将介绍如何了查找点云中每个点的邻近点并统计个数取点云文件我给了个demo文件，在input文件夹下面，其每行为一个点，每行分别有4个数：index， x， y, z0 0.190818 2.151146 1.5161781 0.182662 2.208185 1.5061512 0.212130 2.120995 1.5196543 0.195404 2.216945 1.5547454 0.177847 2.420602 1.
复制链接

扫一扫