CSAPP_ch6 矩阵乘法的优化

尝试了分块不能恰好分匀的情况.

C[][] = A[][] * B[][] 。

C的结构如下图

#include<stdio.h>

#define M 5
#define N 6
#define L 7
#define BLOCK 2 // the size of a BLOCK
/*
+---------+---------+---+    +-----------------
|         |         |   |    |
|   B*B   |  B*B    | I |    |
|         |         |   |    |
+---------+---------+---+     
|         |         |   |
|  B*B    |   B*B   | I |
|         |         |   |
+---------+---------+---+
|   III   |    III  |II |
+---------+---------+---+
       Matrix c
*/ 
int A[M][N];
int BB[N][L];
int C[M][L];
int D[M][L];
void Initialze();
void Mutiplycation_of_Matrix(int A[M][N], int B[N][L], int C[M][L]);
void Mutiplycation_of_Matrix_simple(int A[M][N], int B[N][L], int C[M][L]);
void print();

int main(){
    Initialze();
    
    Mutiplycation_of_Matrix_simple(A, BB, C);
    Mutiplycation_of_Matrix(A, BB, D);
    
    print();
    
    return 0;
}
void print(){
    int i,j, k;
    printf("BLOCK size is %d\n ", BLOCK);
    printf("------The left is original version, \t the right is optimized one------\n");
    for(i=0; i<M; i++){
        for(j=0; j< L ;j++){
            printf("%6d", C[i][j]);
            if(C[i][j]!= D[i][j]){
                goto done;
            }
        }
        printf(" | ");
        for(j=0; j< L ;j++){
            printf("%6d", D[i][j]);
        }
        printf("\n");
    }
    printf("Result is right!\n");
    return;
    done:
        printf("The result of optimized is wrong!\n");

}
void Initialze(){
    int i, j;
    printf("---------------- Matrix A is below:  ----------\n");
    for(i=0; i< M; i++){
        printf("%6d:",i);
        for(j=0; j<N; j++){
            A[i][j] = i*100+j;
            printf("%6d", i*100+j);
        }
        printf("\n");
    }

    printf("\n\n---------------- Matrix B is below:  ----------\n");
    for(i=0; i< N; i++){
        printf("%6d:",i);
        for(j=0; j<L; j++){
            BB[i][j] = i*11-2*j;
            printf("%6d", i*11-2*j);
        }
        printf("\n");
    }
    printf("\n\n");
}
void Mutiplycation_of_Matrix_simple(int A[M][N], int B[N][L], int C[M][L]){
     //memset(C, 0, M*L*sizeof(int));
    int i,j,k;
    for(i=0; i<M; i++){
        for(j=0; j<L; j++){
            C[i][j] = 0;
        }
    }

    for(i=0; i<M; i++){
        for(j=0; j<L; j++){
            for(k=0; k < N ; k++){
                C[i][j] += A[i][k] * B[k][j];
            }
        }
    }
}
void Mutiplycation_of_Matrix(int A[M][N], int B[N][L], int C[M][L]){
    int i, j, k, l, m, p_k, acc;

    //memset(C, 0, M*L*sizeof(int));
    for(i=0; i<M; i++){
        for(j=0; j<L; j++){
            C[i][j] = 0;
        }
    }

    for(i=0; i <= M - BLOCK ; i+=BLOCK)
        for(j=0; j<= L-BLOCK; j+= BLOCK){
            
            for(k=0; k<= N-BLOCK; k+= BLOCK){
            
                for(l=i; l< i+BLOCK; l++){
                    for(m = j; m< j+BLOCK; m++){
                        acc = 0; // patial variable to avoid repately read_write the memory
                        for(p_k = k; p_k < k+BLOCK; p_k++){
                            acc += A[l][p_k] * B[p_k][m];   // can be optimized
                        }
                        C[l][m] += acc;
                    }
                }
            }
            // A[][]列的边角料的处理,横行一刷子刷过来 denote: 确保B*B内计算的值正确
            for(l=i; l< i+BLOCK; l++){
                for(m = j; m< j+BLOCK; m++){
                    acc = 0;// patial variable to avoid repately read_write the memory
                    for(p_k=k; p_k < N; p_k++){
                        acc += A[l][p_k] * B[p_k][m];   // can be optimized
                    }
                    C[l][m] += acc;
                }
            }
        }
    // caculate the I and II part of the Matrix C
    int tmp_i = i, tmp_j=j; 
    for(i=0; i<M; i++)
        for(j = tmp_j; j<L ;j++){        // forget the bracket!!!
            acc = 0;
            for(k=0; k<N; k++){
                acc += A[i][k] * B[k][j];
            }    
            C[i][j] += acc;
        }

    // caculate the III part of the Matrix C
    for(i=tmp_i; i<M; i++)
        for(j = 0; j< tmp_j ;j++){        // forget the bracket!!!
            acc = 0;
            for(k=0; k<N; k++){
                acc += A[i][k] * B[k][j];
            }    
            C[i][j] += acc;
        }    
    //另起炉灶 --- 处理A底部的行,B[][]右边的列
    /*      only caculate the part I of the Matic C
    for(; i<M; i++)
        for(; j<L ;j++){        // forget the bracket!!!
            acc = 0;
            for(k=0; k<N; k++){
                acc += A[i][k] * B[k][j];
            }    
            C[i][j] += acc;
        }
    */
}

 

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值