话不多说,直接进入主题
cublas是CUDA上矩阵运算的库,可以在gpu上实现很高的效率。然而关于它的使用,并没有详细的中文资料
笔者,经过多次调试成功,分享一点儿心得
#include <iostream>
#include <cstdlib>
#include <cublas_v2.h>
// Multiply the arrays A and B on GPU and save the result in C
// C(m,n) = A(m,k) * B(k,n)
//计算过程
void gpu_blas_mmul(const float *A, const float *B, float *C, const int m, const int k, const int n) {
int lda=m,ldb=k,ldc=m;
const float alf = 1;
const float bet = 0;
const float *alpha = &alf;
const float *beta = &bet;
// Create a handle for CUBLAS
cublasHandle_t handle;
cublasCreate(&handle);
// Do the actual multiplication
cublasSgemm(handle, CUBLAS_OP_N, CUBLAS_OP_N, m, n, k, alpha, A, lda, B, ldb, beta, C, ldc);
// Destroy the handle
cublasDestroy(handle);
}
int main(){
int row1=5;
int column1 &#