Matrix multiplication
Time Limit: 4000/2000 MS (Java/Others) Memory Limit: 131072/131072 K (Java/Others)
Problem Description
Given two matrices A and B of size n×n, find the product of them.
bobo hates big integers. So you are only asked to find the result modulo 3.
bobo hates big integers. So you are only asked to find the result modulo 3.
Input
The input consists of several tests. For each tests:
The first line contains n (1≤n≤800). Each of the following n lines contain n integers -- the description of the matrix A. The j-th integer in the i-th line equals A ij. The next n lines describe the matrix B in similar format (0≤A ij,B ij≤10 9).
The first line contains n (1≤n≤800). Each of the following n lines contain n integers -- the description of the matrix A. The j-th integer in the i-th line equals A ij. The next n lines describe the matrix B in similar format (0≤A ij,B ij≤10 9).
Output
For each tests:
Print n lines. Each of them contain n integers -- the matrix A×B in similar format.
Print n lines. Each of them contain n integers -- the matrix A×B in similar format.
Sample Input
1 0 1 2 0 1 2 3 4 5 6 7
Sample Output
0 0 1 2 1
题意:给出两个n*n的矩阵,求这两个矩阵的乘积,结果对3取余。
分析:拿到题先用了经典的矩阵相乘的方法,提交以后果断超时了。后来在网上搜了一下矩阵相乘优化,找到了一个优化方法,只可惜现在我还没有理解是怎么优化的。
![哭](http://static.blog.csdn.net/xheditor/xheditor_emot/default/cry.gif)
#include<cstdio>
#include<cstring>
#include<algorithm>
using namespace std;
const int N = 805;
int a[N][N], b[N][N], ans[N][N];
void Multi(int n)
{
int i, j, k, L, *p2;
int tmp[N], con;
for(i = 0; i < n; ++i)
{
memset(tmp, 0, sizeof(tmp));
for(k = 0, L = (n & ~15); k < L; ++k)
{
con = a[i][k];
for(j = 0, p2 = b[k]; j < n; ++j, ++p2)
tmp[j] += con * (*p2);
if((k & 15) == 15)
{
for(j = 0; j < n; ++j) tmp[j] %= 3;
}
}
for( ; k < n; ++k)
{
con = a[i][k];
for(j = 0, p2 = b[k]; j < n; ++j, ++p2)
tmp[j] += con * (*p2);
}
for(j = 0; j < n; ++j)
ans[i][j] = tmp[j] % 3;
}
}
int main()
{
int n, i, j, k;
while(~scanf("%d",&n))
{
for(i = 0; i < n; i++)
for(j = 0; j < n; j++)
{
scanf("%d",&a[i][j]);
a[i][j] %= 3;
}
for(i = 0; i < n; i++)
for(j = 0; j < n; j++)
{
scanf("%d",&b[i][j]);
b[i][j] %= 3;
}
Multi(n);
for(i = 0; i < n; i++)
{
for(j = 0; j < n-1; j++)
printf("%d ", ans[i][j]);
printf("%d\n", ans[i][n-1]);
}
}
return 0;
}
http://blog.csdn.net/gogdizzy/article/details/9003369这里面讲解了矩阵相乘的优化方法。
下面这种方法也可以过:
#include<cstdio>
#include<cstring>
#include<algorithm>
#include<cmath>
using namespace std;
const int N = 805;
int a[N][N], b[N][N], ans[N][N];
int main()
{
int n, i, j, k;
while(~scanf("%d",&n))
{
for(i = 1; i <= n; i++)
for(j = 1; j <= n; j++)
{
scanf("%d",&a[i][j]);
a[i][j] %= 3;
}
for(i = 1; i <= n; i++)
for(j = 1; j <= n; j++)
{
scanf("%d",&b[i][j]);
b[i][j] %= 3;
}
memset(ans, 0, sizeof(ans));
for(k = 1; k <= n; k++) //经典算法中这层循环在最内层,放最内层会超时,但是放在最外层或者中间都不会超时,不知道为什么
for(i = 1; i <= n; i++)
for(j = 1; j <= n; j++)
{
ans[i][j] += a[i][k] * b[k][j];
//ans[i][j] %= 3; //如果在这里对3取余,就超时了
}
for(i = 1; i <= n; i++)
{
for(j = 1; j < n; j++)
printf("%d ", ans[i][j] % 3);
printf("%d\n", ans[i][n] % 3);
}
}
return 0;
}