这里对矩阵运算做个记录
头文件
头文件
#include "cuda_runtime.h"
#include "cublas_v2.h"
辅助函数
//两个模板函数
#define fatalError(s) do { \
std::stringstream _where, _message; \
_where << __FILE__ << ':' << __LINE__; \
_message << std::string(s) + "\n" << __FILE__ << ':' << __LINE__; \
std::cerr << _message.str() << "\nAborting...\n"; \
cudaDeviceReset(); \
exit(1); \
} while(0)
#define callCuda(status) do { \
std::stringstream _error; \
if (status != 0) { \
_error << "Cuda failure: " << status; \
fatalError(_error.str()); \
} \
} while(0)
//矩阵
void printMatrix(int *x, int n, int r, int c, std::string name) {
printf("%s: \n", name.c_str());
for (int i = 0; i < r; i++) {
for (int j = 0; j < c; j++)
printf("%d, \t", x[i + j * r]);
printf("\n");
}
printf("\n");
}
//随机数据生成
void generateVector(float *v, int n, int lower, int upper) {
for (int i = 0; i < n; i++)
v[i] = (float)(rand() % (upper - lower) + lower);
}
使用矩阵相乘函数
__host__ void cudaDDotTest2()
{
printf("---- Demo ans := x .* y ----\n");
cudaError_t cudaStat;
cublasStatus_t stat;
cublasHandle_t handle;
const int n = 25;
float *x = new float[n];
generateVector(x, n);
printVector(x, n, "x");
printMatrix(x,25,5,5,"matrixX");
float *y = new float[n];
generateVector(y, n);
printVector(y, n, "y");
printMatrix(y, 25, 5, 5, "matriY");
float *d_x;
float *d_y;
cudaStat = cudaMalloc((void**)&d_x, n * sizeof(*x));
cudaStat = cudaMalloc((void**)&d_y, n * sizeof(*y));
stat = cublasCreate(&handle);
stat = cublasSetVector(n, sizeof(*x), x, 1, d_x, 1);
stat = cublasSetVector(n, sizeof(*y), y, 1, d_y, 1);
float result;
stat = cublasSdot(handle, n, d_x, 1, d_y, 1, &result);
printNumber(&result, "ans");
cudaFree(d_x);
cudaFree(d_y);
cublasDestroy(handle);
delete[] x;
delete[] y;
}
测试,这里生成5*5矩阵,进行GPU求和