cublas是cuda的一个线性代数库,cublas_api.h声明了关于线性代数的函数,自己边看边记录一下,以备以后用
简单总结一下:cublas只是简单的矩阵运算,只有几个函数涉及的解方程的内容,有一个地方提到了LU分解,内容有限
1 从cpu的vector复制n个元素到GPU的memory
cublasStatus_t CUBLASWINAPI cublasSetVector (int n, int elemSize, const void *x,
int incx, void *devicePtr, int incy);
//异步,就是还没往GPU中还没转移完,CPU就获取了控制权
cublasStatus_t CUBLASWINAPI cublasSetVectorAsync (int n, int elemSize,
const void *hostPtr, int incx,
void *devicePtr, int incy,
cudaStream_t stream);
2 从GPU的vector复制n个元素到CPU的memory
cublasStatus_t CUBLASWINAPI cublasGetVector (int n, int elemSize, const void *x,
int incx, void *y, int incy);
//asyncronously
cublasStatus_t CUBLASWINAPI cublasGetVectorAsync (int n, int elemSize,
const void *devicePtr, int incx,
void *hostPtr, int incy,
cudaStream_t stream);
3 从CPU的一个矩阵A中复制cols*rows个元素到GPU的矩阵B中
cublasStatus_t CUBLASWINAPI cublasSetMatrix (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb);
//asynchronously
cublasStatus_t CUBLASWINAPI cublasSetMatrixAsync (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb, cudaStream_t stream);
4 从GPU的一个矩阵A中复制rows*cols个元素到GPU的矩阵B中
cublasStatus_t CUBLASWINAPI cublasGetMatrix (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb);
//asynchrously
cublasStatus_t CUBLASWINAPI cublasGetMatrixAsync (int rows, int cols, int elemSize,
const void *A, int lda, void *B,
int ldb, cudaStream_t stream);
5 设置和获取cuBLAD library的流
cublasStatus_t
cublasSetStream(cublasHandle_t handle, cudaStream_t streamId)
cublasStatus_t
cublasGetStream(cublasHandle_t handle, cudaStream_t *streamId)
6 设置和获取PointerMode,即该指针指向的是host还是device
cublasStatus_t
cublasSetPointerMode(cublasHandle_t handle, cublasPointerMode_t mode)
cublasStatus_t
cublasGetPointerMode(cublasHandle_t handle, cublasPointerMode_t *mode)
7 有些routines可以设置原子操作
cublasStatus_t cublasSetAtomicsMode(cublasHandlet handle, cublasAtomicsMode_t mode)
cublasStatus_t cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode)
8 设置和获取是否为tensor core操作
cublasStatus_t cublasSetMathMode(cublasHandle_t handle, cublasMath_t mode)
cublasStatus_t cublasGetMathMode(cublasHandle_t handle, cublasMath_t *mode)
9 获取最大最小值
cublasStatus_t cublasIsamax(cublasHandle_t handle, int n,
const float *x, int incx, int *result)
cublasStatus_t cublasIdamax(cublasHandle_t handle, int n,
const double *x, int incx, int *result)
cublasStatus_t cublasIcamax(cublasHandle_t handle, int n,
const cuComplex *x, int incx, int *result)
cublasStatus_t cublasIzamax(cublasHandle_t handle, int n,
const cuDoubleComplex *x, int incx, int *result)
cublasStatus_t cublasIsamin(cublasHandle_t handle, int n,
const float *x, int incx, int *result)
cublasStatus_t cublasIdamin(cublasHandle_t handle, int n,
const double *x, int incx, int *result)
cublasStatus_t cublasIcamin(cublasHandle_t handle, int n,
const cuComplex *x, int incx, int *result)
cublasStatus_t cublasIzamin(cublasHandle_t handle, int n,
const cuDoubleComplex *x, int incx, int *result)
10 求和
cublasStatus_t cublasSasum(cublasHandle_t handle, int n,
const float *x, int incx, float *result)
cublasStatus_t cublasDasum(cublasHandle_t handle, int n,
const double *x, int incx, double *result)
cublasStatus_t cublasScasum(cublasHandle_t handle, int n,