对函数 cblas_sgemm的说明

最新推荐文章于 2024-08-29 09:41:55 发布

zhuiqiuk

最新推荐文章于 2024-08-29 09:41:55 发布

阅读量6.7k

点赞数

文章标签： android studio

计算矩阵乘法的函数之一是 cblas_sgemm，使用单精度实数，另外还有对应双精度实数，单精度复数和双精度复数的函数。在此以 cblas_sgemm为例。

函数定义为：

void cblas_sgemm(const enum CBLAS_ORDER Order, const enum CBLAS_TRANSPOSE TransA,

const enum CBLAS_TRANSPOSE TransB, const int M, const int N,

const int K, const float alpha, const float *A,

const int lda, const float *B, const int ldb,

const float beta, float *C, const int ldc)

得到的结果是:

C = alpha*op( A )*op( B ) + beta*C

const enum CBLAS_ORDER Order，这是指的数据的存储形式，在CBLAS的函数中无论一维还是二维数据都是用一维数组存储，这就要涉及是行主序还是列主序，在C语言中数组是用行主序，fortran中是列主序。我还是习惯于是用行主序，所以这个参数是用CblasRowMajor，如果是列主序的话就是 CblasColMajor。

const int M，矩阵A的行，矩阵C的行

const int N，矩阵B的列，矩阵C的列

const int K，矩阵A的列，矩阵B的行

const float alpha， const float beta，计算公式中的两个参数值，如果只是计算C=A*B，则alpha=1,beta=0

const float *A， const float *B， const float *C，矩阵ABC的数据

const int lda， const int ldb， const int ldc，在BLAS的文档里，这三个参数分别为ABC的行数，但是实际使用发现，在CBLAS里应该是列数。

我在这里计算两个简单矩阵的乘法。

1,2,3

4,5,6

7,8,9

8,7,6

5,4

3,2

1,0

程序代码：

//因为程序是C++，而CBLAS是C语言写的，所以在此处用extern关键字

extern "C"

{
 #include  

} 

#include 

using namespace std;

int main(void) {
 

 const enum CBLAS_ORDER Order=CblasRowMajor;
 const enum CBLAS_TRANSPOSE TransA=CblasNoTrans;
 const enum CBLAS_TRANSPOSE TransB=CblasNoTrans;
 const int M=4;//A的行数，C的行数
 const int N=2;//B的列数，C的列数
 const int K=3;//A的列数，B的行数
 const float alpha=1;
 const float beta=0;
 const int lda=K;//A的列
 const int ldb=N;//B的列
 const int ldc=N;//C的列
 const float A[K*M]={1,2,3,4,5,6,7,8,9,8,7,6};
 const float B[K*N]={5,4,3,2,1,0};
 float C[M*N]; 

 

 cblas_sgemm(Order, TransA, TransB, M, N, K, alpha, A, lda, B, ldb, beta,  C, ldc);
 

 for(int i=0;i<</FONT>M;i++)
 {
  for(int j=0;j<</FONT>N;j++)
  {
   cout<<C[i*N+j]<<"\t";
  }
  cout<<endl;
 } 

 return EXIT_SUCCESS;

}

#include <stdio.h>
#include <string.h>
#include <ctype.h>
#include <stdarg.h>
#include <math.h>

#include "mkl_cblas.h"
//#include <stdlib.h>
#include "mkl.h"
#define MAX_GRP_COUNT 5

int main()
{
float *A, *B, *C;
int m, n, k, i, j;
float alpha, beta;
printf("\n This example computes real matrix C=alpha*A*B+beta*C using \n"
" Intel(R) MKL function dgemm, where A, B, and C are matrices and \n"
" alpha and beta are double precision scalars\n\n");
//m = 2000, k = 200, n = 1000;
m = 2, k = 4, n = 3;
printf(" Initializing data for matrix multiplication C=A*B for matrix \n"
" A(%ix%i) and matrix B(%ix%i)\n\n", m, k, k, n);
alpha = 1.0; beta = 0.0;
printf(" Allocating memory for matrices aligned on 64-byte boundary for better \n"
" performance \n\n");
A = (float *)mkl_malloc(m*k*sizeof(float), 64);
B = (float *)mkl_malloc(k*n*sizeof(float), 64);
C = (float *)mkl_malloc(m*n*sizeof(float), 64);
if (A == NULL || B == NULL || C == NULL) {
printf("\n ERROR: Can't allocate memory for matrices. Aborting... \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);
return 1;
}
printf(" Intializing matrix data \n\n");
for (i = 0; i < (m*k); i++) {
A[i] = (float)(i + 1);
}
for (i = 0; i < (k*n); i++) {
B[i] = (float)(-i - 1);
}
for (i = 0; i < (m*n); i++) {
C[i] = 0.0;
}
printf(" Computing matrix product using Intel(R) MKL dgemm function via CBLAS interface \n\n");
cblas_sgemm(CblasRowMajor, CblasNoTrans, CblasTrans, m, n, k, alpha, A, k, B, k, beta, C, n);
printf("\n Computations completed.\n\n");
printf(" Top left corner of matrix A: \n");
for (i = 0; i<min(m, 6); i++) {
for (j = 0; j<min(k, 6); j++) {
printf("%12.0f", A[j + i*k]);
}
printf("\n");
}
printf("\n Top left corner of matrix B: \n");
for (i = 0; i<min(k, 6); i++) {
for (j = 0; j<min(n, 6); j++) {
printf("%12.0f", B[j + i*n]);
}
printf("\n");
}
printf("\n Top left corner of matrix C: \n");
for (i = 0; i<min(m, 6); i++) {
for (j = 0; j<min(n, 6); j++) {
printf("%12.5G", C[j + i*n]);
}
printf("\n");
}
printf("\n Deallocating memory \n\n");
mkl_free(A);
mkl_free(B);
mkl_free(C);
printf(" Example completed. \n\n");
return 0;
}