初步学习CUDA编程,实现简单稀疏矩阵向量乘法运算,由于硬件限制,目前只测试了单精度程序
GPU计算子程序gpu_fmmv.cu:
#include <stdio.h>
#include <stdlib.h>
// CUDA-C includes
#include <cuda_runtime.h>
#ifdef __cplusplus
extern "C" {
#endif
// For Fortran interface //
#define GPU_fmmv gpu_fmmv_
extern void GPU_fmmv(int *, int *, int *, float *, float *, float *);
#ifdef __cplusplus
}
#endif
#define THREAD_NUM 512
__global__ static void fmmv(int *neq, int *numcol, int *ia, float *a, float *v, float *w)
{
const int tId = threadIdx.x;
int row, col;
for(row = tId; row < *neq; row += THREAD_NUM){
w[row] = 0.0;
for(int num = numcol[row]; num < numcol[row+1]; num ++){