cuda学习笔记
标签(空格分隔): 学习笔记
一、学习平台
1.1学习平台搭建与学习平台基本信息
1.1.1学习平台搭建
此篇文档的整理基于nvidia公司出品的GeForce GTX 950 GPU,在电脑主机当中安装好独立显卡之后,安装cuda7.0至软件盘(不用再单独安装显卡驱动程序)。在vs下新建cuda工程,就可以编写cuda程序了。
1.1.2学习平台基本信息
在编写cuda程序时,程序的头文件应该包括 “cuda_runtime.h”和”device_launch_parameters.h”;以下一段代码用来查看显卡gpu的计算性能和架构
int main()
{
cudaDeviceProp prop;
int count;
cudaGetDeviceCount(&count);
for (int i = 0; i < count; ++i){
cudaGetDeviceProperties(&prop, i);
printf(" --- Genaral Information for Device %d ---\n", i);
printf("Name : %s\n",prop.name);
printf("Compute capability : %d.%d\n", prop.major, prop.minor);
printf("Clock rate: %d\n",prop.clockRate);
printf("Device copy overlap: ");
if (prop.deviceOverlap){
printf("Enabled\n");
}
else{
printf("Disabled\n");
}
printf("Kernel execition timeout : ");
if (prop.kernelExecTimeoutEnabled)
printf("Enabled\n");
else
printf("Disabled\n");
printf(" ---Memory Information for Device %d ---\n",i);
printf("Total global mem: %ld\n",prop.totalGlobalMem);
printf("Total const mem : %ld\n", prop.totalConstMem);
printf("Max mem pitch : %ld\n", prop.memPitch);
printf("Texture Alignment : %ld\n",prop.textureAlignment);
printf(" ---MP Information for device %d ---\n", i);
printf("Multiprocessor count : %d\n", prop.multiProcessorCount);
printf("shared mem per mp: %d\n", prop.sharedMemPerBlock);
printf("Register per mp: %d\n",prop.regsPerBlock);
printf("Threads in warp: %d\n", prop.warpSize);
printf(" Max threads per block :%d\n", prop.maxThreadsPerBlock);
printf("Max thread dimentions : (%d, %d, %d)\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[2]);
printf("Max grid dimensions:(%d, %d, %d)\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[2] );