其实cuda中取余并没有问题(写过示例,打印出来看过);但是不知道为什么放点源,出来的波形是阶梯状的并且居然没有发散——这是个十分严肃又恐怖的问题
所以学长反问我可不可以用circshift函数去做,所以就写了一下,代码如下:
#define _CRT_SECURE_NO_WARNINGS
#include <stdio.h>
#include<iostream>
void circshift(int* out, int* in, int xdim, int ydim, int zdim, int xshift, int yshift, int zshift)
{
for (int k = 0; k < zdim; k++) {
int kk = (k + zshift) % zdim;
for (int j = 0; j < ydim; j++) {
int jj = (j + yshift) % ydim;
for (int i = 0; i < xdim; i++) {
int ii = (i + xshift) % xdim;
int tid = kk * xdim * ydim + jj * xdim + ii;
int tid2 = k * xdim * ydim + j * xdim + i;
out[tid] = in[tid2];
}
}
}
}
int main()
{
int xdim = 3;
int ydim=4;
int zdim = 5;
int xshift = 0;
int yshift = 0;
int zshift = zdim-1;
int n = xdim* ydim*zdim;
int size1 = n * sizeof(int);
int* a = (int*)malloc(size1);
int* b = (int*)malloc(size1);
int i, j;
for (i = 0; i <n; i++)
a[i]=i;
printf("before cirshift\n");
for (int k = 0; k < zdim; k++) {
for (int j = 0; j < ydim; j++) {
for (int i = 0; i < xdim; i++) {
int tid2 = k * xdim * ydim + j * xdim + i;
printf("%3d", a[tid2]);
}
printf("\n");
}
printf("\n");
}
//for (i = 0; i < xdim; i++) {
// for (j = 0; j < ydim; j++) {
// int tid = i * ydim + j;
// printf("%3d", a[tid]);
// }
// printf("\n");
//}
circshift(b,a, xdim, ydim, zdim, xshift, yshift, zshift);
printf("after cirshift\n");
for (int k = 0; k < zdim; k++) {
for (int j = 0; j < ydim; j++) {
for (int i = 0; i < xdim; i++) {
int tid2 = k * xdim * ydim + j * xdim + i;
printf("%3d", b[tid2]);
}
printf("\n");
}
printf("\n");
}
//for (i = 0; i < xdim; i++) {
// for (j = 0; j < ydim; j++) {
// int tid = i * ydim + j;
// printf("%3d", b[tid]);
// }
// printf("\n");
//}
//
return 0;
}
中间注释的是先写了个2D的数组看看情况,2D,3D都差不多
其中,以z方向为例,zshift = 1,或者zshift = zdim+1,是一样的结果
zshift = zdim-1