基于块的矩阵相乘与常规的相乘的对比试验

因为基于块的矩阵相乘会比较好的利用缓存机制,作如下试验,测试一下性能。

测试代码如下:

#include <stdio.h>

#include <iostream>

#include <string>

#include <malloc.h>

#include <memory.h>

#include <stdlib.h>

#include <sys/time.h>

#include <unistd.h>

#include <sys/stat.h>

 

 

using namespace std;

 

#define MATRIX_X 1024

#define MATRIX_Y 1024

 

#define MATRIX_SUB_X 8

#define MATRIX_SUB_Y 8

 

#define MATRIX_SUBSET_X 128

#define MATRIX_SUBSET_Y 128

 

#define USING_STACK 0

 

int main()

{

    struct timeval tv_start, tv_end;

#if USING_STACK

    int A[MATRIX_X][MATRIX_Y], B[MATRIX_X][MATRIX_Y], C[MATRIX_X][MATRIX_Y];

    int A_sub[MATRIX_SUBSET_X][MATRIX_SUBSET_Y][MATRIX_SUB_X][MATRIX_SUB_Y];

    int B_sub[MATRIX_SUBSET_X][MATRIX_SUBSET_Y][MATRIX_SUB_X][MATRIX_SUB_Y];

    int C_sub[MATRIX_SUBSET_X][MATRIX_SUBSET_Y][MATRIX_SUB_X][MATRIX_SUB_Y];

    int M_sub[MATRIX_SUBSET_X][MATRIX_SUBSET_X][MATRIX_SUBSET_Y][MATRIX_SUB_X][MATRIX_SUB_Y];

#else

    int *A = (int*)malloc(MATRIX_X*MATRIX_Y*sizeof(int));

    int *B = (int*)malloc(MATRIX_X*MATRIX_Y*sizeof(int));

    int *C = (int *)malloc(MATRIX_X*MATRIX_Y*sizeof(int));

    int *A_sub = (int*)malloc(MATRIX_SUBSET_X * MATRIX_SUBSET_Y * MATRIX_SUB_X*MATRIX_SUB_Y * sizeof(int));

    int *B_sub = (int*)malloc(MATRIX_SUBSET_X * MATRIX_SUBSET_Y * MATRIX_SUB_X*MATRIX_SUB_Y * sizeof(int));

    int *C_sub = (int*)malloc(MATRIX_SUBSET_X * MATRIX_SUBSET_Y * MATRIX_SUB_X*MATRIX_SUB_Y * sizeof(int));

    int *M_sub = (int*)malloc(MATRIX_SUBSET_X * MATRIX_SUBSET_X * MATRIX_SUBSET_Y * MATRIX_SUB_X * MATRIX_SUB_Y *sizeof(int));

    printf("0x%016x, 0x%016x, 0x%016x, 0x%016x\n", A_sub, B_sub, C_sub, M_sub);

#endif

 

    for(int i = 0; i < MATRIX_X; i++)

    {

        for(int j = 0; j < MATRIX_Y; j++)

        {

#if USING_STACK

            A[i][j] = i * MATRIX_X + j;

            B[i][j] = i * MATRIX_X + j;

            C[i][j] = 0;

#else

            *(A + i*MATRIX_Y + j) = i * MATRIX_X + j;

            *(B + i*MATRIX_Y + j) = i * MATRIX_X + j;

            *(C + i*MATRIX_Y + j) = 0;

#endif

        }

    }

    gettimeofday(&tv_start, 0);

    for(int i = 0; i < MATRIX_X; i++)

    {

        for(int j = 0; j < MATRIX_Y; j++)

        {

            for(int k = 0; k < MATRIX_Y; k++)

            {

#if USING_STACK

                C[i][j] += A[i][k]*B[k][j];

#else

                *(C+i*MATRIX_Y +j) += *(A+i*MATRIX_Y+k)*(*(B+k*MATRIX_Y+j));

#endif

            }

        }

 

    }

    gettimeofday(&tv_end, 0);

    printf("%s(%d):  cost %ldms\n", __FILE__, __LINE__,  (tv_end.tv_sec * 1000000 + tv_end.tv_usec - tv_start.tv_sec * 1000000 - tv_start.tv_usec)/1000);

 

    /divide A and B to sub matrix

    /divide A and B to sub matrix

    for(int i = 0; i < MATRIX_SUBSET_X; i++)

    {

        for(int j = 0; j < MATRIX_SUBSET_Y; j++)

        {

#if USING_STACK

#else

            int *pA_sub = A_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(i*MATRIX_SUBSET_Y+j);

            int *pB_sub = B_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(i*MATRIX_SUBSET_Y+j);

#endif

            for(int k = 0; k < MATRIX_SUB_X; k++)

            {

                for(int m = 0; m < MATRIX_SUB_Y; m++)

                {

                    int x = i*MATRIX_SUB_X + k;

                    int y = j*MATRIX_SUBSET_Y + m;

#if USING_STACK

                    A_sub[i][j][k][m] = A[x][y];

                    B_sub[i][j][k][m] = B[x][y];

#else

                    *(pA_sub + k*MATRIX_SUB_Y +m) = *(A+x*MATRIX_Y+y);

                    *(pB_sub + k*MATRIX_SUB_Y +m) = *(B+x*MATRIX_Y+y);

#endif

                }

            }

        }

 

    }

 //   memset(M_sub ,0, sizeof(int)*MATRIX_SUB_X*MATRIX_SUB_Y*MATRIX_SUBSET_X*MATRIX_SUBSET_X*MATRIX_SUBSET_Y);

  //  memset(C_sub ,0, sizeof(int)*MATRIX_SUB_X*MATRIX_SUB_Y*MATRIX_SUBSET_X*MATRIX_SUBSET_Y);

 

    gettimeofday(&tv_start, 0);

    for(int i = 0; i < MATRIX_SUBSET_X; i++)

    {

        for(int j = 0; j < MATRIX_SUBSET_Y; j++)

        {

            for(int t = 0; t < MATRIX_SUBSET_Y; t++)

            {

#if USING_STACK

#else

            int *pA_sub = A_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(i*MATRIX_SUBSET_Y+j);

            int *pB_sub = B_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(j*MATRIX_SUBSET_Y+t);

            int *pM_sub = M_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(i*MATRIX_SUBSET_Y * MATRIX_SUBSET_Y + j*MATRIX_SUBSET_Y+t);

           // printf("0x%016x, 0x%016x,  0x%016x, 0x%016x\n", pA_sub, pB_sub,  pM_sub, M_sub);

#endif

 

                for(int k = 0; k < MATRIX_SUB_X; k++)

                {

                    for(int m = 0; m < MATRIX_SUB_Y; m++)

                    {

                        for(int n = 0; n < MATRIX_SUB_Y; n++)

                        {

#if USING_STACK

                            M_sub[i][j][t][k][m] += A_sub[i][j][k][n]*B_sub[j][t][n][m];

#else

            //                printf("0x%016x, 0x%016x,  0x%016x, 0x%016x\n", pA_sub, pB_sub,  pM_sub, M_sub);

              //              printf(" %d, %d,\n", *(pA_sub+MATRIX_SUB_Y*k+n), *(pB_sub+MATRIX_SUB_Y*n+m));

                            int *pTmp1 = pA_sub+MATRIX_SUB_Y*k+n;

                            int *pTmp2 = pB_sub+MATRIX_SUB_Y*n+m;

                            int *pTmp3 = pM_sub+MATRIX_SUB_Y*k+m;

                            *(pTmp3) += *pTmp1*(*pTmp2);

#endif

                        }

                    }

                }

            }

        }

    }

    for(int i = 0; i < MATRIX_SUBSET_X; i++)

    {

        for(int j = 0; j < MATRIX_SUBSET_Y; j++)

        {

#if USING_STACK

#else

            int *pC_sub = C_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(i*MATRIX_SUBSET_Y+j);

#endif

           // int *pCsub = C_sub[i][j];

            for(int k = 0; k < MATRIX_SUB_X; k++)

            {

#if USING_STACK

#else

#endif

                for(int m = 0; m < MATRIX_SUB_Y; m++)

                {

                   // int *pMsub = M_sub[i][m][j];

                    for(int t = 0; t < MATRIX_SUBSET_Y; t++)

                    {

#if USING_STACK

                        C_sub[i][j][k][m] += M_sub[i][m][j][k][m];

#else

                        int *pM_sub = M_sub + (MATRIX_SUB_X*MATRIX_SUB_Y)*(i*MATRIX_SUBSET_Y * MATRIX_SUBSET_Y + j*MATRIX_SUBSET_Y+t);

                        pM_sub += MATRIX_SUB_Y*k+m;

                        *(pC_sub+MATRIX_SUB_Y*k+m) += *pM_sub;

#endif

                    }

                }

            }

        }

    }

 

    gettimeofday(&tv_end, 0);

    printf("%s(%d):  cost %ldms\n", __FILE__, __LINE__,  (tv_end.tv_sec * 1000000 + tv_end.tv_usec - tv_start.tv_sec * 1000000 - tv_start.tv_usec)/1000);

 

    return 0;

}

               

 

测试结果如下:

root@opzoon150:/data_4/songqing/fortest# ./a.out

0x00000000653e0010, 0x0000000064fdf010, 0x0000000064bde010, 0x0000000044bdd010

test.cpp(77):  cost 11946ms

test.cpp(180):  cost 6971ms

也就是说,如果是两个比较大的矩阵相乘时,进行分块相乘计算还是有价值的。

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值