安装cuda 9.0出现没有核心执行或找不到核心的问题但是cuda8.0可以正常运行

最新推荐文章于 2022-11-20 21:55:56 发布

jifaley

最新推荐文章于 2022-11-20 21:55:56 发布

阅读量872

点赞数

分类专栏：各种各样出错的处理文章标签： cuda

各种各样出错的处理专栏收录该内容

9 篇文章 0 订阅

订阅专栏

windows7 cuda9.0 GT 630M vs2015

出现同样的代码，使用cuda8.0可以检测到GPU

而使用cuda9.0不可以。（cudaGetDeviceCount=0）本来以为是版本冲突，直到看到下面这篇文章！！

https://blog.csdn.net/zhouyiqi_c/article/details/79121469

原来是计算能力的问题，我的GPU计算能力达不到cuda9.0的要求（计算能力3.0）。

测试代码如下：

int main()
{
    const int arraySize = 5;
    const int a[arraySize] = { 1, 2, 3, 4, 5 };
    const int b[arraySize] = { 10, 20, 30, 40, 50 };
    int c[arraySize] = { 0 };

	cudaDeviceProp prop;
	//cudaSetDevice(1);
	int count;
	//读取设备的数量
	cudaGetDeviceCount(&count);
	printf("GPU_count = %d\n\n", count);
	for (int i = 0; i<count; i++)
	{
		//获取相应设备号的设备信息
		cudaGetDeviceProperties(&prop, i);
		printf("   --- General Information for device %d ---\n", i);
		//显卡名称
		printf("Name:\t%s\n", prop.name);
		//计算能力      
		printf("Compute capability:\t%d.%d\n", prop.major, prop.minor);
		//时钟频率
		printf("Clock rate:\t%d\n", prop.clockRate);
		//设备复制重叠
		printf("Device copy overlap:\t");
		if (prop.deviceOverlap)
		{
			printf("Enable\n");
		}
		else
		{
			printf("Disable\n");
		}
		//内核执行超时
		printf("Kernel execition timeout:\t");
		if (prop.kernelExecTimeoutEnabled)
		{
			printf("Enable\n");
		}
		else
		{
			printf("Disable\n");
		}
		printf("   --- Memory Information for device %d ---\n", i);
		//设备上全局内存的总量，单位为字节
		printf("Total global Mem:\t%ld\n", prop.totalGlobalMem);
		//设备上的常量内存的总量，单位为字节
		printf("Total constant Mem:\t%ld\n", prop.totalConstMem);
		//内存拷贝允许的最大字节间距
		printf("Max men pitch:\t%ld\n", prop.memPitch);
		//纹理对齐要求
		printf("Texture Alignment:\t%ld\n", prop.textureAlignment);
		printf("   ---MP Information for device %d ---\n", i);
		//设备上多处理器的数量
		printf("Multiprocessor count:\t%d\n", prop.multiProcessorCount);
		//以字节为单位的每个块可用的共享内存
		printf("Shared mem per mp:\t%ld\n", prop.sharedMemPerBlock);
		//每个块可用的32位寄存器
		printf("Registers per mp:\t%d\n", prop.regsPerBlock);
		//每个warp包含的线程数量
		printf("Threads in warp:\t%d\n", prop.warpSize);
		//每个块包含的最大线程数量
		printf("Max threads per block:\t%d\n", prop.maxThreadsPerBlock);
		//块的每个维度的最大大小
		printf("Max threads dimensions:\t(%d, %d, %d)\n", prop.maxThreadsDim[0], prop.maxThreadsDim[1], prop.maxThreadsDim[3]);
		//网格的每个维度的最大大小
		printf("Max grid dimensions:\t(%d, %d, %d)\n", prop.maxGridSize[0], prop.maxGridSize[1], prop.maxGridSize[3]);
		printf("\n\n");
	}

	system("pause");

    // Add vectors in parallel.
    cudaError_t cudaStatus = addWithCuda(c, a, b, arraySize);
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "addWithCuda failed!");
        return 1;
    }

    printf("{1,2,3,4,5} + {10,20,30,40,50} = {%d,%d,%d,%d,%d}\n",
        c[0], c[1], c[2], c[3], c[4]);

    // cudaDeviceReset must be called before exiting in order for profiling and
    // tracing tools such as Nsight and Visual Profiler to show complete traces.
    cudaStatus = cudaDeviceReset();
    if (cudaStatus != cudaSuccess) {
        fprintf(stderr, "cudaDeviceReset failed!");
        return 1;
    }

	system("pause");
    return 0;
}