矩阵乘法
#include<iostream>
#include<ctime>
using namespace std;
int main() {
int n = 1000;
double* a = new double[n*n];
double* b = new double[n*n];
double* c = new double[n*n];
int i, j, k;
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
a[i*n + j] = i + j;
b[i*n + j] = i + j;
}
auto tic = clock();
double s{ 0 };
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
s = 0.0;
for (k = 0; k < n; ++k)
s += a[i*n + k] * a[k*n + j];
c[i*n + j] = s;
}
auto toc = clock() - tic;
cout << toc << endl;
}
耗时 1600ms
后面矩阵转置
#include<iostream>
#include<ctime>
using namespace std;
int main() {
int n = 1000;
double* a = new double[n*n];
double* b = new double[n*n];
double* c = new double[n*n];
int i, j, k;
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
a[i*n + j] = i + j;
b[i*n + j] = i + j;
}
auto tic = clock();
double s{ 0 };
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
s = 0.0;
for (k = 0; k < n; ++k)
s += a[i*n + k] * b[j*n + k];
c[i*n + j] = s;
}
auto toc = clock() - tic;
cout << toc << endl;
}
耗时 960ms
循环展开2个乘积
#include<iostream>
#include<ctime>
using namespace std;
int main() {
int n = 1000;
double* a = new double[n*n];
double* b = new double[n*n];
double* c = new double[n*n];
int i, j, k;
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
a[i*n + j] = i + j;
b[i*n + j] = i + j;
}
auto tic = clock();
double s{ 0 };
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
s = 0.0;
for (k = 0; k < n; k += 2)
s += a[i*n + k] * b[j*n + k] +
a[i*n + k + 1] * b[j*n + k + 1];
c[i*n + j] = s;
}
auto toc = clock() - tic;
cout << toc << endl;
}
耗时 560 ms
循环展开4个乘积
#include<iostream>
#include<ctime>
using namespace std;
int main() {
int n = 1000;
double* a = new double[n*n];
double* b = new double[n*n];
double* c = new double[n*n];
int i, j, k, i1, j1;
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
a[i*n + j] = i + j;
b[i*n + j] = i + j;
}
auto tic = clock();
double s{ 0 };
for (i = 0; i < n; ++i) {
i1 = i * n;
for (j = 0; j < n; ++j) {
j1 = j * n;
s = 0.0;
for (k = 0; k < n; k += 4)
s += a[i1 + k] * b[j1 + k] +
a[i1 + k + 1] * b[j1 + k + 1] +
a[i1 + k + 2] * b[j1 + k + 2] +
a[i1 + k + 3] * b[j1 + k + 3];
c[i*n + j] = s;
}
}
auto toc = clock() - tic;
cout << toc << endl;
}
耗时 480 ms
蛇形顺序
#include<iostream>
#include<ctime>
using namespace std;
int main() {
int n = 1000;
double* a = new double[n*n];
double* b = new double[n*n];
double* c = new double[n*n];
int i, j, k, i1, j1;
for (i = 0; i < n; ++i)
for (j = 0; j < n; ++j) {
a[i*n + j] = i + j;
b[i*n + j] = i + j;
}
auto tic = clock();
double s{ 0 };
for (i = 0; i < n; ++i) {
i1 = i * n;
if (i % 2 == 0) {
for (j = 0; j < n; ++j) {
j1 = j * n;
s = 0.0;
for (k = 0; k < n; k += 4)
s += a[i1 + k] * b[j1 + k] +
a[i1 + k + 1] * b[j1 + k + 1] +
a[i1 + k + 2] * b[j1 + k + 2] +
a[i1 + k + 3] * b[j1 + k + 3];
c[i*n + j] = s;
}
}
else {
for (j = n - 1; j >= 0; --j) {
j1 = j * n;
s = 0.0;
for (k = 0; k < n; k += 4)
s += a[i1 + k] * b[j1 + k] +
a[i1 + k + 1] * b[j1 + k + 1] +
a[i1 + k + 2] * b[j1 + k + 2] +
a[i1 + k + 3] * b[j1 + k + 3];
c[i*n + j] = s;
}
}
}
auto toc = clock() - tic;
cout << toc << endl;
}
耗时 430 ms
注: visual C++ 2017, release, Intel()R) Core()TM) i7-8700K CPU 3.7G Hz