目录
一、要求
(一)内容与设计思想
1.
编程实现普通的矩阵乘法;
2.
编程实现
Strassen’s algorithm
;
3.
在不同数据规模情况下(数据规模N=2^3,2^5,2^7,2^9 )下,比较两种算法的运行时间各是多少;
4.
修改
Strassen’s algorithm
,使之适应矩阵规模
N
不是
2
的幂的情况;
5.
改进后的算法与
2
中的算法在相同数据规模下进行比较。
为便于同学操作,我们设置了课程编程题在
oj |
《算法设计与分析》第二次实验课
,同学可以提交算法
代码到这个平台去计算和比较时间。
注意
1
:
每组样例有对应的执行次数,如果需要得到单次时间,要除以重复数。
注意
2
:
由于平台的缘故,可能会有
30ms
的波动;解决办法是多提交几次(比如
5
次)取平均数。
(二)实验过程
分别画出各个实验结果的折线图
(三)总结
对上机实践结果进行分析,问题回答,上机的心得体会及改进意见。
二、代码
起初,我是把普通乘法和Strassen算法完全分开的,后来在CSDN上查找资料时偶然地看到了一位大佬的代码,他是把矩阵乘法分开成加法和乘法,相对我的代码要简洁好多……于是我又根据这种思路改进了我的代码(或许称不上改进,基本推翻重来了)
数据的第一行有两个整数 N(1<=N<=163841≤N≤16384) 和 M (8<=M<=512)。表示以下有 N 对矩阵(必定是成对输入的),并且每个矩阵的大小都是M×M。
(一)原来的代码
其中关同步,提高cin / cout效率
ios::sync_with_stdio(false); //关同步,提高cin / cout效率
cin.tie(nullptr);
因为数据实在太大了,不写它可能就运行不起来
1.矩阵加法.cpp
#include<iostream>
using namespace std;
void Matrix_Sum(int n, int** MatrixA, int** MatrixB, int** MatrixSum) {
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
MatrixSum[i][j] = MatrixA[i][j] + MatrixB[i][j];
}
2.矩阵减法.cpp
#include<iostream>
using namespace std;
void Matrix_Sub(int n, int** MatrixA, int** MatrixB, int** MatrixSub) {
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++)
MatrixSub[i][j] = MatrixA[i][j] - MatrixB[i][j];
}
3.普通乘法.cpp
#include<iostream>
using namespace std;
void Matrix_Mul(int n, int** MatrixA, int** MatrixB, int** MatrixMul) {
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
MatrixMul[i][j] = 0;
for (int k = 0; k < n; k++)
MatrixMul[i][j] = MatrixMul[i][j] + MatrixA[i][k] * MatrixB[k][j];
}
}
4.Strassen.cpp
#include<iostream>
#include "矩阵加法.cpp"
#include "矩阵减法.cpp"
#include "普通乘法.cpp"
using namespace std;
void Strassen(int N, int** MatrixA, int** MatrixB, int** MatrixC ) {
int n = N / 2; //分治思想
//初始化每个小矩阵的大小
int** MatrixA11 = new int* [n];
int** MatrixA12 = new int* [n];
int** MatrixA21 = new int* [n];
int** MatrixA22 = new int* [n];
int** MatrixB11 = new int* [n];
int** MatrixB12 = new int* [n];
int** MatrixB21 = new int* [n];
int** MatrixB22 = new int* [n];
int** MatrixC11 = new int* [n];
int** MatrixC12 = new int* [n];
int** MatrixC21 = new int* [n];
int** MatrixC22 = new int* [n];
for (int i = 0; i < n ; i++) { //分配连续内存
MatrixA11[i] = new int[n];
MatrixA12[i] = new int[n];
MatrixA21[i] = new int[n];
MatrixA22[i] = new int[n];
MatrixB11[i] = new int[n];
MatrixB12[i] = new int[n];
MatrixB21[i] = new int[n];
MatrixB22[i] = new int[n];
MatrixC11[i] = new int[n];
MatrixC12[i] = new int[n];
MatrixC21[i] = new int[n];
MatrixC22[i] = new int[n];
}
//为每个小矩阵赋值,将大矩阵分割为4个小矩阵
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
MatrixA11[i][j] = MatrixA[i][j];
MatrixA12[i][j] = MatrixA[i][j + n];
MatrixA21[i][j] = MatrixA[i + n][j];
MatrixA22[i][j] = MatrixA[i + n][j + n];
MatrixB11[i][j] = MatrixB[i][j];
MatrixB12[i][j] = MatrixB[i][j + n];
MatrixB21[i][j] = MatrixB[i + n][j];
MatrixB22[i][j] = MatrixB[i + n][j + n];
}
//存放加减法结果
int** S1 = new int* [n];
int** S2 = new int* [n];
int** S3 = new int* [n];
int** S4 = new int* [n];
int** S5 = new int* [n];
int** S6 = new int* [n];
int** S7 = new int* [n];
int** S8 = new int* [n];
int** S9 = new int* [n];
int** S10 = new int* [n];
for (int i = 0; i < n; i++) { //分配连续内存
S1[i] = new int[n];
S2[i] = new int[n];
S3[i] = new int[n];
S4[i] = new int[n];
S5[i] = new int[n];
S6[i] = new int[n];
S7[i] = new int[n];
S8[i] = new int[n];
S9[i] = new int[n];
S10[i] = new int[n];
}
//计算
Matrix_Sub(n, MatrixA12, MatrixA22, S1);
Matrix_Sum(n, MatrixB21, MatrixB22, S2);
Matrix_Sum(n, MatrixA11, MatrixA22, S3);
Matrix_Sum(n, MatrixB11, MatrixB22, S4);
Matrix_Sub(n, MatrixA11, MatrixA21, S5);
Matrix_Sum(n, MatrixB11, MatrixB12, S6);
Matrix_Sum(n, MatrixA11, MatrixA12, S7);
Matrix_Sub(n, MatrixB12, MatrixB22, S8);
Matrix_Sub(n, MatrixB21, MatrixB11, S9);
Matrix_Sum(n, MatrixA21, MatrixA22, S10);
//存放乘法结果
int** M1 = new int* [n];
int** M2 = new int* [n];
int** M3 = new int* [n];
int** M4 = new int* [n];
int** M5 = new int* [n];
int** M6 = new int* [n];
int** M7 = new int* [n];
for (int i = 0; i < n; i++) { //分配连续内存
M1[i] = new int[n];
M2[i] = new int[n];
M3[i] = new int[n];
M4[i] = new int[n];
M5[i] = new int[n];
M6[i] = new int[n];
M7[i] = new int[n];
}
Matrix_Mul(n, S1, S2, M1);
Matrix_Mul(n, S3, S4, M2);
Matrix_Mul(n, S5, S6, M3);
Matrix_Mul(n, S7, MatrixB22, M4);
Matrix_Mul(n, MatrixA11, S8, M5);
Matrix_Mul(n, MatrixA22, S9, M6);
Matrix_Mul(n, S10, MatrixB11, M7);
//finally
//计算C
Matrix_Sum(n, M1, M2, MatrixC11);
Matrix_Sub(n, MatrixC11, M4, MatrixC11);
Matrix_Sum(n, MatrixC11, M6, MatrixC11);
Matrix_Sum(n, M4, M5, MatrixC12);
Matrix_Sum(n, M6, M7, MatrixC21);
Matrix_Sub(n, M2, M3, MatrixC22);
Matrix_Sum(n, MatrixC22, M5, MatrixC22);
Matrix_Sub(n, MatrixC22, M7, MatrixC22);
//将C合并
for (int i = 0; i < n; i++)
for (int j = 0; j < n; j++) {
MatrixC[i][j] = MatrixC11[i][j];
MatrixC[i][j + n] = MatrixC12[i][j];
MatrixC[i + n][j] = MatrixC21[i][j];
MatrixC[i + n][j + n] = MatrixC22[i][j];
}
}
5.main.cpp
#include<iostream>
#include<fstream>
#include"Strassen.cpp"
#include<time.h>
using namespace std;
ifstream fi("6.in", ios::in);
int main() {
clock_t startTime_For_Normal_Multipilication;
clock_t endTime_For_Normal_Multipilication;
clock_t startTime_For_Strassen;
clock_t endTime_For_Strassen;
time_t start, end;
int N;
fi >> N;
int n=N;
int M; //矩阵大小
fi >> M;
int** MatrixA = new int* [M];
int** MatrixB = new int* [M];
int** MatrixC = new int* [M];
int** MatrixT = new int* [M]; //用于检测结果是否正确
for (int i = 0; i < M; i++) {
MatrixA[i] = new int[M];
MatrixB[i] = new int[M];
MatrixC[i] = new int[M];
MatrixT[i] = new int[M];
}
int i,j;
int time1=0;
int time2=0;
while(N--){
for(i=0;i<M;i++){
for(j=0;j<M;j++){
fi >> MatrixA[i][j];
}
}
for(i=0;i<M;i++){
for(j=0;j<M;j++){
fi >> MatrixB[i][j];
}
}
//Mul
startTime_For_Normal_Multipilication = clock();
Matrix_Mul(M, MatrixA, MatrixB, MatrixT);
endTime_For_Normal_Multipilication = clock();
time1+=endTime_For_Normal_Multipilication - startTime_For_Normal_Multipilication;
//cout << "print:" << endl;
//print(MatrixT, M);
//Strassen
startTime_For_Strassen = clock();
Strassen(M, MatrixA, MatrixB, MatrixC);
endTime_For_Strassen = clock();
time2+=endTime_For_Strassen - startTime_For_Strassen;
//cout << "print:" << endl;
//print(MatrixC, M);
}
cout << "Mul:" << time1 << endl;
cout << "Strassen:" << time2;
return 0;
}
(二)Strassen算法改进
矩阵规模
不是
2
的幂
当n为奇数时,先给矩阵加一行一列0,使之成为一个偶数矩阵。
#include <iostream>
using namespace std;
void Strassen(int N, int **MatrixA, int **MatrixB, int **MatrixC){
int halfsize=N/2;
if (N<=16)
normal(MatrixA,MatrixB,MatrixC,N);
else{
int** A11;
int** A12;
int** A21;
int** A22;
int** B11;
int** B12;
int** B21;
int** B22;
int** C11;
int** C12;
int** C21;
int** C22;
int** M1;
int** M2;
int** M3;
int** M4;
int** M5;
int** M6;
int** M7;
int** a_re;
int** b_re;
A11=new int *[halfsize];
A12=new int *[halfsize];
A21=new int *[halfsize];
A22=new int *[halfsize];
B11=new int *[halfsize];
B12=new int *[halfsize];
B21=new int *[halfsize];
B22=new int *[halfsize];
C11=new int *[halfsize];
C12=new int *[halfsize];
C21=new int *[halfsize];
C22=new int *[halfsize];
M1=new int *[halfsize];
M2=new int *[halfsize];
M3=new int *[halfsize];
M4=new int *[halfsize];
M5=new int *[halfsize];
M6=new int *[halfsize];
M7=new int *[halfsize];
a_re=new int *[halfsize];
b_re=new int *[halfsize];
A11=new int *[halfsize];
A12=new int *[halfsize];
A21=new int *[halfsize];
A22=new int *[halfsize];
B11=new int *[halfsize];
B12=new int *[halfsize];
B21=new int *[halfsize];
B22=new int *[halfsize];
C11=new int *[halfsize];
C12=new int *[halfsize];
C21=new int *[halfsize];
C22=new int *[halfsize];
M1=new int *[halfsize];
M2=new int *[halfsize];
M3=new int *[halfsize];
M4=new int *[halfsize];
M5=new int *[halfsize];
M6=new int *[halfsize];
M7=new int *[halfsize];
a_re=new int *[halfsize];
b_re=new int *[halfsize];
for(int i=0;i<halfsize;i++){
A11[i]=new int[halfsize];
A12[i]=new int[halfsize];
A21[i]=new int[halfsize];
A22[i]=new int[halfsize];
B11[i]=new int[halfsize];
B12[i]=new int[halfsize];
B21[i]=new int[halfsize];
B22[i]=new int[halfsize];
C11[i]=new int[halfsize];
C12[i]=new int[halfsize];
C21[i]=new int[halfsize];
C22[i]=new int[halfsize];
M1[i]=new int[halfsize];
M2[i]=new int[halfsize];
M3[i]=new int[halfsize];
M4[i]=new int[halfsize];
M5[i]=new int[halfsize];
M6[i]=new int[halfsize];
M7[i]=new int[halfsize];
a_re[i]=new int[halfsize];
b_re[i]=new int[halfsize];
}
for (int i=0;i<N/2;i++){//devide
for (int j=0;j<N/2;j++){
A11[i][j]=MatrixA[i][j];
A12[i][j]=MatrixA[i][j+N/2];
A21[i][j]=MatrixA[i+N/2][j];
A22[i][j]=MatrixA[i+N/2][j+N/2];
B11[i][j]=MatrixB[i][j];
B12[i][j]=MatrixB[i][j+N/2];
B21[i][j]=MatrixB[i+N/2][j];
B22[i][j]=MatrixB[i+N/2][j+N/2];
}
}
Add(A11,A22,a_re,halfsize);
Add(B11,B22,b_re,halfsize);
Strassen(halfsize,a_re,b_re,M1);
Add(A21,A22,a_re,halfsize);
Strassen(halfsize,a_re,B11,M2);
Sub(B12,B22,b_re,halfsize);
Strassen(halfsize,A11,b_re,M3);
Sub(B21,B11,b_re,halfsize);
Strassen(halfsize,A22,b_re,M4);
Add(A11,A12,a_re,halfsize);
Strassen(halfsize,a_re,B22,M5);
Sub(A21,A11,a_re,halfsize);
Add(B11,B12,b_re,halfsize);
Strassen(halfsize,a_re,b_re,M6);
Sub(A12,A22,a_re,halfsize);
Add(B21,B22,b_re,halfsize);
Strassen(halfsize,a_re,b_re,M7);
Add(M1,M4,a_re,halfsize);
Sub(M7,M5,b_re,halfsize);
Add(a_re,b_re,C11,halfsize);
Add(M3,M5,C12,halfsize);
Add(M2,M4,C21,halfsize);
Add(M1,M3,a_re,halfsize);
Sub(M6,M2,b_re,halfsize);
Add(a_re,b_re,C22,halfsize);
for(int i=0;i<N/2;i++){
for(int j=0;j<N/2;j++){
MatrixC[i][j]=C11[i][j];
MatrixC[i][j+N/2]=C12[i][j];
MatrixC[i+N/2][j]=C21[i][j];
MatrixC[i+N/2][j+N/2]=C22[i][j];
}
}
for (int i=0;i<halfsize;i++){
delete[] A11[i];delete[] A12[i];delete[] A21[i];
delete[] A22[i];
delete[] B11[i];delete[] B12[i];delete[] B21[i];
delete[] B22[i];
delete[] C11[i];delete[] C12[i];delete[] C21[i];
delete[] C22[i];
delete[] M1[i];delete[] M2[i];delete[] M3[i];delete[] M4[i];
delete[] M5[i];delete[] M6[i];delete[] M7[i];
delete[] a_re[i];delete[] b_re[i] ;
}
delete[] A11;delete[] A12;delete[] A21;delete[] A22;
delete[] B11;delete[] B12;delete[] B21;delete[] B22;
delete[] C11;delete[] C12;delete[] C21;delete[] C22;
delete[] M1;delete[] M2;delete[] M3;delete[] M4;delete[] M5;
delete[] M6;delete[] M7;
delete[] a_re;
delete[] b_re ;
}
}
void Add(int** MatrixA,int** MatrixB,int** Matrix_re,int Size){
for(int i=0;i<Size;i++){
for(int j=0;j<Size;j++){
Matrix_re[i][j]=MatrixA[i][j]+MatrixB[i][j];
}
}
}
void Sub(int** MatrixA,int** MatrixB,int** Matrix_re,int Size){
for(int i=0;i<Size;i++){
for(int j=0;j<Size;j++){
Matrix_re[i][j]=MatrixA[i][j]-MatrixB[i][j];
}
}
}
void normal(int** MatrixA,int** MatrixB,int** Matrix_re,int Size){
for(int i=0;i<Size;i++)
for(int j=0;j<Size;j++){
Matrix_re[i][j]=0;
for(int k=0;k<Size;k++)
Matrix_re[i][j]=Matrix_re[i][j]+MatrixA[i][k]*MatrixB[k][j];
}
}
int cinn(int** MatrixA,int** MatrixB,int length,int hou_length){
for(int row=0;row<hou_length;row++)
for(int column=0;column<hou_length;column++){
MatrixA[row][column]=MatrixB[row][column]=0;
}
for(int row=0;row<length;row++)
for(int column=0;column<length;column++)
cin>>MatrixA[row][column];
for(int row=0;row<length;row++)
for(int column=0;column<length;column++)
cin>>MatrixB[row][column];
return 0;
}
void Printm(int **MatrixA,int Size){
for(int row=0;row<Size;row++)
for(int column=0;column<Size;column++){
cout<<MatrixA[row][column];
if (column+1==Size)
cout<<endl;
else
cout<<" ";
}
}
int main(){
ios::sync_with_stdio(false); //关同步,提高cin / cout效率
cin.tie(nullptr);
int Size,time;
int** MatrixA;
int** MatrixB;
int** MatrixC;
cin>>time>>Size;
int N=1;
while(Size>N)
N*=2;
while(time){
MatrixA=new int *[N];
MatrixB=new int *[N];
MatrixC=new int *[N];
for (int i=0;i<N;i++){
MatrixA[i]=new int [N];
MatrixB[i]=new int [N];
MatrixC[i]=new int [N];
}
cinn(MatrixA,MatrixB,Size,N); //得到随机矩阵
Strassen(N,MatrixA,MatrixB,MatrixC);
Printm(MatrixC,Size);
time--;
}
return 0;
}
三、折线图
(一)普通乘法和Strassen算法(ms)
(二)数据规模为2的幂与不是2的幂(ms)
然而k=300时运行时间在一百左右
四、总结
朴素矩阵的时间复杂度是O(n^3),而Strassen算法的时间复杂度是O(n^2.807),维度比较大的矩阵使用Strassen算法时时间可能更短,乘法运算相对朴素矩阵有所减少
在Strassen算法中,由于涉及到大量读写操作,这些读写操作都需要大量循环,当读写次数超出使用Strassen乘法的收益时,更适宜用朴素矩阵