下面是我的问题代码:
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include "device_functions.h"
#include
#include
#define W 25
#define H 25
// 核函数
__global__ void kernel(int* a, size_t pitch)
{
int x = threadIdx.x;
int y = threadIdx.y;
int *row_a = (int*)((char*)a + y * pitch);
// Clear to zero
row_a[x] = 0;
}
int main()
{
int **a;
int *dev_a;
size_t pitch;
dim3 threads(W, H);
// 为a动态分配内存
a = (int**)malloc(H * sizeof(int*));
for (int i = 0; i < H; i++)
{
a[i] = (int*)malloc(W * sizeof(int));
}
// 初始化数组a
for (int i = 0; i < H; i++)
{
for (int j = 0; j < W; j++)
{
a[i][j] = 1;
}
}
printf("修改前的数组内容:\n");
for (int i = 0; i < H; i++)
{
for (int j = 0; j < W; j++)
{
printf("%d ", a[i][j]);
}
printf("\n");
}
// 使用cudaMallocPitch分配设备内存
cudaMallocPitch((void**)&dev_a, &pitch, W * sizeof(int), H);
// 将数组a中的内容拷贝到设备数组dev_a上
cudaMemcpy2D(dev_a, pitch, a, W * sizeof(int), W * sizeof(int), H, cudaMemcpyHostToDevice);
// 调用核函数
kernel<<<1, threads>>>(dev_a, pitch);
// 将结果拷贝回主机
cudaMemcpy2D(a, W * sizeof(int), dev_a, pitch, W * sizeof(int), H, cudaMemcpyDeviceToHost);
printf("修改后的数组内容:\n");
for (int i = 0; i < H; i++)
{
for (int j = 0; j < W; j++)
{
printf("%d ", a[i][j]);
}
printf("\n");
}
return 0;
}
上面代码编译是没有错误的,但是运行时程序会崩溃。当我将动态数组改成静态数组之后,就不会出现这个问题了。希望各位能帮忙解决一下。