显卡的相关性能参数含义(struct cudaDeviceProp)

最新推荐文章于 2023-07-28 19:57:39 发布

Jackery_Shh

最新推荐文章于 2023-07-28 19:57:39 发布

阅读量4.8k

点赞数 2

分类专栏：并行运算文章标签：显卡性能并行运算 cuda

本文链接：https://blog.csdn.net/gggg_ggg/article/details/48130615

版权

并行运算专栏收录该内容

13 篇文章 0 订阅

订阅专栏

中文译注(英文见下文)：

struct cudaDeviceProp {
char name[256];         //器件的名字
size_t totalGlobalMem;    //Global Memory 的byte大小
size_t sharedMemPerBlock;   //线程块可以使用的共用记忆体的最大值。byte为单位，多处理器上的所有线程块可以同时共用这些记忆体
int regsPerBlock;                 //线程块可以使用的32位寄存器的最大值，多处理器上的所有线程快可以同时实用这些寄存器
int warpSize;                    //按线程计算的wrap块大小
size_t memPitch;        //做内存复制是可以容许的最大间距，允许通过cudaMallocPitch（）为包含记忆体区域的记忆提复制函数的最大间距，以byte为单位。
int maxThreadsPerBlock;   //每个块中最大线程数
int maxThreadsDim[3];       //块各维度的最大值
int maxGridSize[3];             //Grid各维度的最大值
size_t totalConstMem; //常量内存的大小
int major;            //计算能力的主代号
int minor;            //计算能力的次要代号
int clockRate;     //时钟频率
size_t textureAlignment; //纹理的对齐要求
int deviceOverlap;    //器件是否能同时执行cudaMemcpy()和器件的核心代码
int multiProcessorCount; //设备上多处理器的数量
int kernelExecTimeoutEnabled; //是否可以给核心代码的执行时间设置限制
int integrated;                  //这个GPU是否是集成的
int canMapHostMemory; //这个GPU是否可以讲主CPU上的存储映射到GPU器件的地址空间
int computeMode;           //计算模式
int maxTexture1D;          //一维Textures的最大维度
int maxTexture2D[2];      //二维Textures的最大维度
int maxTexture3D[3];      //三维Textures的最大维度
int maxTexture2DArray[3];     //二维Textures阵列的最大维度
int concurrentKernels;           //GPU是否支持同时执行多个核心程序
}

English Version：

struct __device_builtin__ cudaDeviceProp
{
char name[256]; /**< ASCII string identifying device */
size_t totalGlobalMem; /**< Global memory available on device in bytes */
size_t sharedMemPerBlock; /**< Shared memory available per block in bytes */
int regsPerBlock; /**< 32-bit registers available per block */
int warpSize; /**< Warp size in threads */
size_t memPitch; /**< Maximum pitch in bytes allowed by memory copies */
int maxThreadsPerBlock; /**< Maximum number of threads per block */
int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */
int maxGridSize[3]; /**< Maximum size of each dimension of a grid */
int clockRate; /**< Clock frequency in kilohertz */
size_t totalConstMem; /**< Constant memory available on device in bytes */
int major; /**< Major compute capability */
int minor; /**< Minor compute capability */
size_t textureAlignment; /**< Alignment requirement for textures */
size_t texturePitchAlignment; /**< Pitch alignment requirement for texture references bound to pitched memory */
int deviceOverlap; /**< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
int multiProcessorCount; /**< Number of multiprocessors on device */
int kernelExecTimeoutEnabled; /**< Specified whether there is a run time limit on kernels */
int integrated; /**< Device is integrated as opposed to discrete */
int canMapHostMemory; /**< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
int computeMode; /**< Compute mode (See ::cudaComputeMode) */
int maxTexture1D; /**< Maximum 1D texture size */
int maxTexture1DMipmap; /**< Maximum 1D mipmapped texture size */
int maxTexture1DLinear; /**< Maximum size for 1D textures bound to linear memory */
int maxTexture2D[2]; /**< Maximum 2D texture dimensions */
int maxTexture2DMipmap[2]; /**< Maximum 2D mipmapped texture dimensions */
int maxTexture2DLinear[3]; /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
int maxTexture2DGather[2]; /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
int maxTexture3D[3]; /**< Maximum 3D texture dimensions */
int maxTexture3DAlt[3]; /**< Maximum alternate 3D texture dimensions */
int maxTextureCubemap; /**< Maximum Cubemap texture dimensions */
int maxTexture1DLayered[2]; /**< Maximum 1D layered texture dimensions */
int maxTexture2DLayered[3]; /**< Maximum 2D layered texture dimensions */
int maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */
int maxSurface1D; /**< Maximum 1D surface size */
int maxSurface2D[2]; /**< Maximum 2D surface dimensions */
int maxSurface3D[3]; /**< Maximum 3D surface dimensions */
int maxSurface1DLayered[2]; /**< Maximum 1D layered surface dimensions */
int maxSurface2DLayered[3]; /**< Maximum 2D layered surface dimensions */
int maxSurfaceCubemap; /**< Maximum Cubemap surface dimensions */
int maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */
size_t surfaceAlignment; /**< Alignment requirements for surfaces */
int concurrentKernels; /**< Device can possibly execute multiple kernels concurrently */
int ECCEnabled; /**< Device has ECC support enabled */
int pciBusID; /**< PCI bus ID of the device */
int pciDeviceID; /**< PCI device ID of the device */
int pciDomainID; /**< PCI domain ID of the device */
int tccDriver; /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
int asyncEngineCount; /**< Number of asynchronous engines */
int unifiedAddressing; /**< Device shares a unified address space with the host */
int memoryClockRate; /**< Peak memory clock frequency in kilohertz */
int memoryBusWidth; /**< Global memory bus width in bits */
int l2CacheSize; /**< Size of L2 cache in bytes */
int maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
int streamPrioritiesSupported; /**< Device supports stream priorities */
int globalL1CacheSupported; /**< Device supports caching globals in L1 */
int localL1CacheSupported; /**< Device supports caching locals in L1 */
size_t sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
int regsPerMultiprocessor; /**< 32-bit registers available per multiprocessor */
int managedMemory; /**< Device supports allocating managed memory on this system */
int isMultiGpuBoard; /**< Device is on a multi-GPU board */
int multiGpuBoardGroupID; /**< Unique identifier for a group of devices on the same multi-GPU board */
};