1. 声明 __shared__ 变量或数组:
__shared__
float
sh_farr[
256
];
__shared__ int a;
__shared__ int a;
2.结构体指针成员的分配设备内存:
typedef
struct
Teacher_t
... {
int a;
unsigned int *g_mem1;
float *g_mem2;
} Teacher;
void initMem( Teacher & t, const unsigned int mat_size)
... {
unsigned int mat_size_ui = sizeof(int) * mat_size;
unsigned int mat_size_f = sizeof(float) * mat_size;
CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_ui) );
CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_f) );
...
}
... {
int a;
unsigned int *g_mem1;
float *g_mem2;
} Teacher;
void initMem( Teacher & t, const unsigned int mat_size)
... {
unsigned int mat_size_ui = sizeof(int) * mat_size;
unsigned int mat_size_f = sizeof(float) * mat_size;
CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_ui) );
CUDA_SAFE_CALL( cudaMalloc((void**)&t.g_mem1, mat_size_f) );
...
}
3.计时:
unsigned
int
timer
=
0
;
CUT_SAFE_CALL( cutCreateTimer( & timer));
CUT_SAFE_CALL( cutStartTimer( timer));
... {
...//kernel
}
CUT_SAFE_CALL( cutStopTimer( timer));
printf( " Total time: %f ms " , cutGetTimerValue( timer) );
CUT_SAFE_CALL( cutCreateTimer( & timer));
CUT_SAFE_CALL( cutStartTimer( timer));
... {
...//kernel
}
CUT_SAFE_CALL( cutStopTimer( timer));
printf( " Total time: %f ms " , cutGetTimerValue( timer) );
CUT_SAFE_CALL( cutDeleteTimer( timer));
4. 获取输入命令行中包含的文件名:
/**/
////
// ! Check if a particular filename has to be used for the file where the result
// ! is stored
// ! @param argc number of command line arguments (from main(argc, argv)
// ! @param argv pointers to command line arguments (from main(argc, argv)
// ! @param filename filename of result file, updated if user specified
// ! filename
/**/ ////
void
getResultFilename( int argc, char ** argv, char *& filename)
... {
char* temp = NULL;
cutGetCmdLineArgumentstr( argc, (const char**) argv, "filename-result", &temp);
if( NULL != temp)
...{
filename = (char*) malloc( sizeof(char) * strlen( temp));
strcpy( filename, temp);
cutFree( temp);
}
printf( "Result filename: '%s' ", filename);
}
// ! Check if a particular filename has to be used for the file where the result
// ! is stored
// ! @param argc number of command line arguments (from main(argc, argv)
// ! @param argv pointers to command line arguments (from main(argc, argv)
// ! @param filename filename of result file, updated if user specified
// ! filename
/**/ ////
void
getResultFilename( int argc, char ** argv, char *& filename)
... {
char* temp = NULL;
cutGetCmdLineArgumentstr( argc, (const char**) argv, "filename-result", &temp);
if( NULL != temp)
...{
filename = (char*) malloc( sizeof(char) * strlen( temp));
strcpy( filename, temp);
cutFree( temp);
}
printf( "Result filename: '%s' ", filename);
}
类似的:
/**/
////
// ! Check if a specific precision of the eigenvalue has to be obtained
// ! @param argc number of command line arguments (from main(argc, argv)
// ! @param argv pointers to command line arguments (from main(argc, argv)
// ! @param iters_timing numbers of iterations for timing, updated if a
// ! specific number is specified on the command line
/**/ ////
void
getPrecision( int argc, char ** argv, float & precision)
... {
float temp = -1.0f;
cutGetCmdLineArgumentf( argc, (const char**) argv, "precision", &temp);
if( temp > 0.0f)
...{
precision = temp;
}
printf( "Precision: %f ", precision);
}
// ! Check if a specific precision of the eigenvalue has to be obtained
// ! @param argc number of command line arguments (from main(argc, argv)
// ! @param argv pointers to command line arguments (from main(argc, argv)
// ! @param iters_timing numbers of iterations for timing, updated if a
// ! specific number is specified on the command line
/**/ ////
void
getPrecision( int argc, char ** argv, float & precision)
... {
float temp = -1.0f;
cutGetCmdLineArgumentf( argc, (const char**) argv, "precision", &temp);
if( temp > 0.0f)
...{
precision = temp;
}
printf( "Precision: %f ", precision);
}
5.Host调用完kernel函数需要进行线程同步,而在kernel或global函数只需要在必要的地方__syncthreads();即可:
CUDA_SAFE_CALL( cudaThreadSynchronize());