示意图:
可直接运行,见代码:
#include <cuda_runtime.h>
#include <stdio.h>
struct PARA
{
int c;
};
struct ELE
{
int b;
PARA *h_para;
PARA *d_para;
};
struct LINE
{
int a;
ELE *h_ele;
ELE *d_ele;
};
struct TASK
{
int s;
LINE *h_line;
LINE *d_line;
};
__global__ void printtask(TASK *d_task)
{
printf("g_s: %d\n", d_task->s);
printf("g_a: %d\n", d_task->d_line->a);
printf("g_b: %d\n", d_task->d_line->d_ele->b);
printf("g_c: %d\n", d_task->d_line->d_ele->d_para->c);
d_task->s = 11;
d_task->d_line->a = 12;
d_task->d_line->d_ele->b = 13;
d_task->d_line->d_ele->d_para->c = 14;
}
// 多级结构体指针的嵌套传入gpu的方法
int main()
{
// 主机结构体
TASK *h_task = (TASK *)malloc(sizeof(TASK));
h_task->s = 1;
h_task->h_line = (LINE *)malloc(sizeof(LINE));
h_task->h_line->a = 2;
h_task->h_line->h_ele = (ELE *)malloc(sizeof(ELE));
h_task->h_line->h_ele->b = 3;
h_task->h_line->h_ele->h_para = (PARA *)malloc(sizeof(PARA));
h_task->h_line->h_ele->h_para->c = 4;
printf("h_s1 : %d\n", h_task->s);
printf("h_a1 : %d\n", h_task->h_line->a);
printf("h_b1 : %d\n", h_task->h_line->h_ele->b);
printf("h_c1 : %d\n", h_task->h_line->h_ele->h_para->c);
// 设备结构体
TASK *d_task;
// 设备内存的初始化必须通过主机端指针来进行,会在设备端生成同名的指针
cudaMalloc((void **)&d_task, sizeof(TASK));
cudaMalloc((void **)&h_task->d_line, sizeof(LINE));
cudaMalloc((void **)&h_task->h_line->d_ele, sizeof(ELE));
cudaMalloc((void **)&h_task->h_line->h_ele->d_para, sizeof(PARA));
// 将主机指针指向的内容传入设备指针指向的内存
cudaMemcpy(d_task, h_task, sizeof(TASK), cudaMemcpyHostToDevice);
cudaMemcpy(h_task->d_line, h_task->h_line, sizeof(LINE), cudaMemcpyHostToDevice);
cudaMemcpy(h_task->h_line->d_ele, h_task->h_line->h_ele, sizeof(ELE), cudaMemcpyHostToDevice);
cudaMemcpy(h_task->h_line->h_ele->d_para, h_task->h_line->h_ele->h_para, sizeof(PARA), cudaMemcpyHostToDevice);
printtask<<<1, 1>>>(d_task);
cudaDeviceSynchronize();
// 将设备指针指向的内容传入主机指针指向的内存
cudaMemcpy(h_task, d_task, sizeof(TASK), cudaMemcpyDeviceToHost);
cudaMemcpy(h_task->h_line, h_task->d_line, sizeof(LINE), cudaMemcpyDeviceToHost);
cudaMemcpy(h_task->h_line->h_ele, h_task->h_line->d_ele, sizeof(ELE), cudaMemcpyDeviceToHost);
cudaMemcpy(h_task->h_line->h_ele->h_para, h_task->h_line->h_ele->d_para, sizeof(PARA), cudaMemcpyDeviceToHost);
printf("h_s2 : %d\n", h_task->s);
printf("h_a2 : %d\n", h_task->h_line->a);
printf("h_b2 : %d\n", h_task->h_line->h_ele->b);
printf("h_c2 : %d\n", h_task->h_line->h_ele->h_para->c);
return 0;
}