参照《cuda c编程权威指南》上写出的,在CPU和GPU互相传输结构体。
#include "cuda_runtime.h"
#include "device_launch_parameters.h"
#include <stdio.h>
#include<stdlib.h>
#define maxnumber 100
typedef struct node {
int x[maxnumber];
int y[maxnumber];
int test[maxnumber][maxnumber];
} Node;
__global__ void gpuAddwith(Node* a,Node*c) {
int tid = threadIdx.x;
c->x[tid] = a->x[tid] + a->y[tid];
c->y[tid] = tid;
c->test[tid][1] = 0;
c->test[tid][2] = 0;
}
void Print(Node* a) {
for (int i = 0; i < maxnumber; i++) {
printf("a[%d]x=%d y=%d test[1] is:%d test[2]is:%d\n", i, a->x[i], a->y[i],a->test[i][1], a->test[i][2]);
}
}
int main(){
Node* h_a;
h_a = (Node*)malloc(sizeof(Node)*maxnumber);
for (int i = 0; i < maxnumber; i++) {
h_a->x[i] = i;
h_a->y[i] = maxnumber - i;
for (int j = 0; j < maxnumber; j++) {
h_a->test[i][j] = i * maxnumber + j;
}
}
Print(h_a);
Node* d_a,*d_c;
cudaMalloc((Node**)&d_a, sizeof(Node) * maxnumber);
cudaMalloc((Node**)&d_c, sizeof(Node) * maxnumber);
cudaMemcpy(d_a, h_a, sizeof(Node) * maxnumber, cudaMemcpyHostToDevice);
cudaMemcpy(d_c, h_a, sizeof(Node) * maxnumber, cudaMemcpyHostToDevice);
gpuAddwith << <1,1000 >> > (d_a,d_c);
cudaMemcpy(h_a,d_c , sizeof(Node) * maxnumber, cudaMemcpyDeviceToHost);
Print(h_a);
cudaFree(d_a);
cudaFree(d_c);
free(h_a);
return 0;
}