CUDA实现L2欧式距离

最新推荐文章于 2024-05-14 10:24:23 发布

武乐乐~

最新推荐文章于 2024-05-14 10:24:23 发布

阅读量450

点赞数

分类专栏： CUDA编程文章标签： cuda

本文链接：https://blog.csdn.net/wulele2/article/details/119043340

版权

CUDA编程专栏收录该内容

5 篇文章 0 订阅

订阅专栏

文章目录

前言

前言

本教程实现一个A[5] 和 B[3][5]两个矩阵之间欧氏距离的CUDA代码。

#include <stdio.h>

#define N 5
#define D 3  
#define SIZE  N*D


void __global__ cpt(int *da, int *db, int *dres);

void __global__ cpt(int *da, int *db, int *dres)
{
    int tid = threadIdx.x;   // tid = 0,1,2
    int sum=0;   // register 
    for(int i=0; i<N; ++i)
    {
        sum += (da[i]-db[tid*N + i]) * (da[i]-db[tid*N]+i);
    }
    dres[tid] = sum;
}

int main(int arc, char *argv[])
{
    // host memory and assignment
    int *ha, *hb, *hres;
    ha =(int *)malloc(sizeof(int)*N);
    hb =(int *)malloc(sizeof(int)* SIZE);
    hres = (int *)malloc(sizeof(int)*D);

    for(int i=0; i<N; ++i)
    {
        ha[i] = 1;
    }
    
    for(int i=0; i<SIZE; ++i)
    {
        hb[i] = 0;
    }
    
    for(int i=0; i<D; ++i)
    {
        hres[i] = 0;
    }

    // device memory and copy
    int *da, *db, *dres;
    cudaMalloc((void **)&da, sizeof(int)*N);
    cudaMalloc((void **)&db, sizeof(int)*SIZE);
    cudaMalloc((void **)&dres, sizeof(int)*D);

    cudaMemcpy(da, ha, sizeof(int)*N, cudaMemcpyHostToDevice);
    cudaMemcpy(db, hb, sizeof(int)*SIZE, cudaMemcpyHostToDevice);
    cudaMemcpy(dres, hres, sizeof(int)*D, cudaMemcpyHostToDevice);

    // set threads and global kerner fun
    const dim3 grid_size(1);
    const dim3 block_size(D);

    cpt<<<grid_size,block_size>>>(da,db,dres);

    // cpy device to host
    cudaMemcpy(hres, dres, sizeof(int)*D, cudaMemcpyDeviceToHost);
    
    printf("%d\n",hres[0]);
    // free memory
    free(ha);
    free(hb);
    free(hres);
    cudaFree(da);
    cudaFree(db);
    cudaFree(dres);

    return 0;


}

武乐乐~

关注

0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
CUDA实现L2欧式距离

文章目录前言前言本教程实现一个A[5] 和 B[3][5]两个矩阵之间欧氏距离的CUDA代码。#include <stdio.h>#define N 5#define D 3 #define SIZE N*Dvoid __global__ cpt(int *da, int *db, int *dres);void __global__ cpt(int *da, int *db, int *dres){ int tid = threadIdx.x; /
复制链接

扫一扫