cuda编程:怎么用2维网格2维块计算矩阵加法

目录

网格、块、线程的索引计算(二维)

  一个线程在网格中的块的索引是ix = threadIdx.x + blockIdx.x * blockDim.x和iy = threadIdx.y + blockIdx.y * blockDim.y

代码

/******************************************************************
 * Author: Da Liu
 * Date: 2024-07-25
 * File: grid2D_block2D.cu
 * Description: 组织线程模型:二维网格二维线程块计算二维矩阵加法.
 *****************************************************************/

#include <stdio.h>
#include "../cudalearn/tools/common.cuh"

__global__ void add_matrix(int *a, int *b, int *c, const int nx, const int ny) 
{
    int ix = threadIdx.x + blockIdx.x * blockDim.x;
    int iy = threadIdx.y + blockIdx.y * blockDim.y;
    unsigned int idx = iy * nx + ix;
    if (ix < nx && iy < ny) {
        c[idx] = a[idx] + b[idx];
    }
}

int main()
{
    setGPU();                                           //设置GPU设备
    
    int nx = 16, ny = 8;                                //矩阵大小
    int nxy = nx * ny;                                  //矩阵元素个数
    size_t stBytesCount = nxy * sizeof(int);            //矩阵元素字节数

    int *ipHost_A, *ipHost_B, *ipHost_C;                //主机内存
    ipHost_A = (int* )malloc(stBytesCount);
    ipHost_B = (int* )malloc(stBytesCount);
    ipHost_C = (int* )malloc(stBytesCount);
    if (ipHost_A != NULL && ipHost_B != NULL && ipHost_C != NULL)
    {
        for(int i = 0; i < nxy; i++)
        {
            ipHost_A[i] = i;                             //矩阵A元素初始化为0到nxy-1
            ipHost_B[i] = i + 1;                         //矩阵B元素初始化为A元素+1
        }
        memset(ipHost_C, 0, stBytesCount);               //初始化矩阵C为0
    }
    else
    {
        printf("Memory allocation failed!\n");
        exit(-1);
    }
    int *ipDevice_A, *ipDevice_B, *ipDevice_C;           //设备内存
    ErrorCheck(cudaMalloc((int**)&ipDevice_A, stBytesCount), __FILE__, __LINE__);
    ErrorCheck(cudaMalloc((int**)&ipDevice_B, stBytesCount), __FILE__, __LINE__);
    ErrorCheck(cudaMalloc((int**)&ipDevice_C, stBytesCount), __FILE__, __LINE__);
    if (ipDevice_A != NULL && ipDevice_B != NULL && ipDevice_C != NULL)
    {
        ErrorCheck(cudaMemcpy(ipDevice_A, ipHost_A, stBytesCount, cudaMemcpyHostToDevice), __FILE__, __LINE__);
        ErrorCheck(cudaMemcpy(ipDevice_B, ipHost_B, stBytesCount, cudaMemcpyHostToDevice), __FILE__, __LINE__);
        ErrorCheck(cudaMemcpy(ipDevice_C, ipHost_C, stBytesCount, cudaMemcpyHostToDevice), __FILE__, __LINE__);
    }
    else
    {
        printf("Device Memory copy failed!\n");
        free(ipHost_A);
        free(ipHost_B);
        free(ipHost_C);
        exit(1);
    }

    dim3 blockDim(4, 4);
    dim3 gridDim((nx + blockDim.x - 1) / blockDim.x, (ny + blockDim.y - 1) / blockDim.y);
    printf("Grid Dim: %d, %d\n", gridDim.x, gridDim.y);
    printf("Block Dim: %d, %d\n", blockDim.x, blockDim.y);

    add_matrix<<<gridDim, blockDim>>>(ipDevice_A, ipDevice_B, ipDevice_C, nx, ny);
    ErrorCheck(cudaMemcpy(ipHost_C, ipDevice_C, stBytesCount, cudaMemcpyDeviceToHost), __FILE__, __LINE__);
    for (int i = 0; i < 20; i++)
    {
        printf("id = %d, matrix_A = %d, matrix_B = %d, matrix_C = %d\n", i + 1, ipHost_A[i], ipHost_B[i], ipHost_C[i]);
    }
    free(ipHost_A);
    free(ipHost_B);
    free(ipHost_C);
    ErrorCheck(cudaFree(ipDevice_A), __FILE__, __LINE__);
    ErrorCheck(cudaFree(ipDevice_B), __FILE__, __LINE__);
    ErrorCheck(cudaFree(ipDevice_C), __FILE__, __LINE__);
    return 0;
}

这里代码中的commom.cuh的代码是:
···cpp
/******************************************************************

  • Author: Da Liu
  • Date: 2024-07-10
  • File: common.cuh
    *****************************************************************/
    #pragma once
    #include
    #include<stdlib.h>
    #include<stdio.h>
    #include<cuda_runtime.h>

cudaError_t ErrorCheck(cudaError_t error_code, const char* filename, int lineNumber);

//查看当前可用GPU设备数 并将可用device设置为0
void setGPU(){
int iDeviceCount = 0;
cudaError_t err = ErrorCheck(cudaGetDeviceCount(&iDeviceCount), FILE, LINE);
if (err!= cudaSuccess || iDeviceCount == 0)
{
std::cout<< “No CUDA-capable device found.” << std::endl;
exit(-1);
}
else
{
std::cout << "Number of CUDA-capable devices found: " << iDeviceCount << std::endl;
}

int iDevice = 0;
err = ErrorCheck(cudaSetDevice(iDevice), __FILE__, __LINE__);
if( err != cudaSuccess)
{
    std::cout << "Failed to set device " << iDevice << std::endl;
    exit(-1);
}
else
{
    std::cout << "Device " << iDevice << " set successfully." << std::endl;
}

}

//错误检查函数
cudaError_t ErrorCheck(cudaError_t error_code, const char* filename, int lineNumber)
{
if(error_code != cudaSuccess)
{
std::cout << “CUDA error:\r\ncode=%d,name=%s,description=%s\r\nfile=%s,line=%d\r\n” << error_code
<< cudaGetErrorName(error_code) << cudaGetErrorString(error_code) << filename << lineNumber << std::endl;
return error_code;
}
return error_code;
}

&emsp;&emsp;此时需要将grid2D_block2D.cu代码变成可执行文件,在代码目录的终端中输入
```sh
nvcc name.cu -o name
./name.exe

这样就可以输出结果。
在这里插入图片描述

  • 16
    点赞
  • 15
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

小马敲马

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值