cuda编程
VS2019+GPU
需要引入
再右键打开的项目–>生成依赖项–>生成自定义–>勾选CUDA v10.1。
再右键.cu文件–>文件属性设置为 CUDA c/c++
计算0到255的立方
“kernel.cu”
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
__global__ void add(const int* a, const int* b, int* c, int size)
{
int tid = threadIdx.x + blockIdx.x * blockDim.x;
c[tid] = b[tid] *a[tid];
}
extern "C" void addKernel(const int* a, const int* b, int* c, int size)
{
int* dev_a = 0;
int* dev_b = 0;
int* dev_c = 0;
cudaSetDevice(0);
cudaMalloc((void**)&dev_a, sizeof(int) * size);
cudaMalloc((void**)&dev_b, sizeof(int) * size);
cudaMalloc((void**)&dev_c, sizeof(int) * size);
cudaMemcpy(dev_a, a, sizeof(int) * size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_b, b, sizeof(int) * size, cudaMemcpyHostToDevice);
cudaMemcpy(dev_c, c, sizeof(int) * size, cudaMemcpyHostToDevice);
add << <2, 128 >> > (dev_a, dev_b, dev_c, size);
cudaMemcpy(c, dev_c, size * sizeof(int), cudaMemcpyDeviceToHost);
cudaFree(dev_a);
cudaFree(dev_b);
cudaFree(dev_c);
}
“main.cpp”
#include <stdio.h>
#include <stdlib.h>
//#include "kernel.cu"
#define N 256
extern "C" void addKernel(const int* a, const int* b, int* c, int size);
int main(int argc, char** argv)
{
int a[N];
int b[N];
int c[N];
for (int i = 0; i < N; i++)
{
a[i] = i;
b[i] = i * i;
c[i] = 0;
}
addKernel(a, b, c, N);
for (int i = 0; i < N; i++)
{
printf("%d^3=%d\n", i, c[i]);
}
return 0;
}