尝试了分块不能恰好分匀的情况.
C[][] = A[][] * B[][] 。
C的结构如下图
#include<stdio.h>
#define M 5
#define N 6
#define L 7
#define BLOCK 2 // the size of a BLOCK
/*
+---------+---------+---+ +-----------------
| | | | |
| B*B | B*B | I | |
| | | | |
+---------+---------+---+
| | | |
| B*B | B*B | I |
| | | |
+---------+---------+---+
| III | III |II |
+---------+---------+---+
Matrix c
*/
int A[M][N];
int BB[N][L];
int C[M][L];
int D[M][L];
void Initialze();
void Mutiplycation_of_Matrix(int A[M][N], int B[N][L], int C[M][L]);
void Mutiplycation_of_Matrix_simple(int A[M][N], int B[N][L], int C[M][L]);
void print();
int main(){
Initialze();
Mutiplycation_of_Matrix_simple(A, BB, C);
Mutiplycation_of_Matrix(A, BB, D);
print();
return 0;
}
void print(){
int i,j, k;
printf("BLOCK size is %d\n ", BLOCK);
printf("------The left is original version, \t the right is optimized one------\n");
for(i=0; i<M; i++){
for(j=0; j< L ;j++){
printf("%6d", C[i][j]);
if(C[i][j]!= D[i][j]){
goto done;
}
}
printf(" | ");
for(j=0; j< L ;j++){
printf("%6d", D[i][j]);
}
printf("\n");
}
printf("Result is right!\n");
return;
done:
printf("The result of optimized is wrong!\n");
}
void Initialze(){
int i, j;
printf("---------------- Matrix A is below: ----------\n");
for(i=0; i< M; i++){
printf("%6d:",i);
for(j=0; j<N; j++){
A[i][j] = i*100+j;
printf("%6d", i*100+j);
}
printf("\n");
}
printf("\n\n---------------- Matrix B is below: ----------\n");
for(i=0; i< N; i++){
printf("%6d:",i);
for(j=0; j<L; j++){
BB[i][j] = i*11-2*j;
printf("%6d", i*11-2*j);
}
printf("\n");
}
printf("\n\n");
}
void Mutiplycation_of_Matrix_simple(int A[M][N], int B[N][L], int C[M][L]){
//memset(C, 0, M*L*sizeof(int));
int i,j,k;
for(i=0; i<M; i++){
for(j=0; j<L; j++){
C[i][j] = 0;
}
}
for(i=0; i<M; i++){
for(j=0; j<L; j++){
for(k=0; k < N ; k++){
C[i][j] += A[i][k] * B[k][j];
}
}
}
}
void Mutiplycation_of_Matrix(int A[M][N], int B[N][L], int C[M][L]){
int i, j, k, l, m, p_k, acc;
//memset(C, 0, M*L*sizeof(int));
for(i=0; i<M; i++){
for(j=0; j<L; j++){
C[i][j] = 0;
}
}
for(i=0; i <= M - BLOCK ; i+=BLOCK)
for(j=0; j<= L-BLOCK; j+= BLOCK){
for(k=0; k<= N-BLOCK; k+= BLOCK){
for(l=i; l< i+BLOCK; l++){
for(m = j; m< j+BLOCK; m++){
acc = 0; // patial variable to avoid repately read_write the memory
for(p_k = k; p_k < k+BLOCK; p_k++){
acc += A[l][p_k] * B[p_k][m]; // can be optimized
}
C[l][m] += acc;
}
}
}
// A[][]列的边角料的处理,横行一刷子刷过来 denote: 确保B*B内计算的值正确
for(l=i; l< i+BLOCK; l++){
for(m = j; m< j+BLOCK; m++){
acc = 0;// patial variable to avoid repately read_write the memory
for(p_k=k; p_k < N; p_k++){
acc += A[l][p_k] * B[p_k][m]; // can be optimized
}
C[l][m] += acc;
}
}
}
// caculate the I and II part of the Matrix C
int tmp_i = i, tmp_j=j;
for(i=0; i<M; i++)
for(j = tmp_j; j<L ;j++){ // forget the bracket!!!
acc = 0;
for(k=0; k<N; k++){
acc += A[i][k] * B[k][j];
}
C[i][j] += acc;
}
// caculate the III part of the Matrix C
for(i=tmp_i; i<M; i++)
for(j = 0; j< tmp_j ;j++){ // forget the bracket!!!
acc = 0;
for(k=0; k<N; k++){
acc += A[i][k] * B[k][j];
}
C[i][j] += acc;
}
//另起炉灶 --- 处理A底部的行,B[][]右边的列
/* only caculate the part I of the Matic C
for(; i<M; i++)
for(; j<L ;j++){ // forget the bracket!!!
acc = 0;
for(k=0; k<N; k++){
acc += A[i][k] * B[k][j];
}
C[i][j] += acc;
}
*/
}