cuda编程常用代码示例
龚慧林
积极、乐观、努力
展开
-
cuda编程入门示例1---两个向量对应元素相乘
#include #include #include #include #include #define MIN(a, b) ((a) float* h_A;float* h_B;float* h_C;float* d_A;float* d_B;float* d_C;__global__ void DotMulVet(const原创 2016-11-19 11:03:44 · 3396 阅读 · 2 评论 -
cuda编程入门示例15
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:59:57 · 462 阅读 · 0 评论 -
cuda编程入门示例16
#include #include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %原创 2016-11-19 19:19:16 · 416 阅读 · 0 评论 -
cuda编程入门示例17
#include #include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %原创 2016-11-19 19:35:59 · 324 阅读 · 0 评论 -
cuda编程入门示例18
#include #include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %原创 2016-11-19 19:42:17 · 424 阅读 · 0 评论 -
cuda编程入门示例19---矩阵相乘
#include #include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %转载 2016-11-19 20:04:48 · 500 阅读 · 0 评论 -
cuda编程入门示例20
矩阵相乘并且利用Kahan's Summation Formula 来提高精确度转载 2016-11-19 20:10:57 · 381 阅读 · 0 评论 -
cuda编程入门示例21
利用 Kahan's Summation Formula 来提高精确度,并且利用块内共享内存存储矩阵dev_a的一行,一共n块。转载 2016-11-19 20:16:41 · 1009 阅读 · 0 评论 -
cuda编程入门示例22
利用 Kahan's Summation Formula 来提高精确度,并且利用块内共享内存存储矩阵dev_a的一行,一共n块;显存地址自动对齐,可以提高访问显存的效率。转载 2016-11-19 20:22:53 · 420 阅读 · 0 评论 -
cuda编程入门示例23
利用 Kahan's Summation Formula 来提高精确度,主机端使用分页锁定存储器提高性能,并且利用块内共享内存存储矩阵dev_a的一行,一共n块;显存地址自动对齐,可以提高访问显存的效率。很明显,程序性能有大幅度提升。转载 2016-11-19 20:26:57 · 423 阅读 · 0 评论 -
cuda编程入门示例25
利用 Kahan's Summation Formula 来提高精确度,主机端使用分页锁定存储器提高性能,并且利用块内共享内存存储矩阵dev_a的一行,一共n块;显存地址自动对齐,可以提高访问显存的效率。很明显,程序性能有大幅度提升。转载 2016-11-19 20:43:29 · 400 阅读 · 0 评论 -
cuda编程入门示例14
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:48:06 · 300 阅读 · 0 评论 -
cuda编程入门示例13
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:44:09 · 343 阅读 · 0 评论 -
cuda编程入门示例12
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:34:29 · 406 阅读 · 0 评论 -
cuda编程入门示例2---CUDA environment initialization
#include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cudaGetErrorSt原创 2016-11-19 15:23:56 · 740 阅读 · 0 评论 -
cuda编程入门示例4---数组元素求和+计时
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 15:43:04 · 1067 阅读 · 3 评论 -
cuda编程入门示例3---数组求和
#include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cudaGetErrorSt原创 2016-11-19 15:34:32 · 997 阅读 · 0 评论 -
cuda编程入门示例6
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 15:56:25 · 398 阅读 · 0 评论 -
cuda编程入门示例5
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 15:52:56 · 509 阅读 · 0 评论 -
cuda编程入门示例7
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:06:42 · 431 阅读 · 0 评论 -
cuda编程入门示例8
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:09:40 · 1909 阅读 · 0 评论 -
cuda编程入门示例9
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:16:17 · 594 阅读 · 0 评论 -
cuda编程入门示例10
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:22:17 · 486 阅读 · 0 评论 -
cuda编程入门示例11
#include #include #include #include #define BLOCK_SIZE 16static void HandleError(cudaError_t err, const char *file, int line){ if (err != cudaSuccess) { printf("%s in %s at line %d\n", cuda原创 2016-11-19 16:30:41 · 514 阅读 · 0 评论 -
cuda编程入门示例18-1
在cuda编程入门示例18基础上,仅仅将块内规约进行手动展开,可以稍微提升效率。原创 2016-11-20 15:29:03 · 469 阅读 · 0 评论