参考资料:https://zhuanlan.zhihu.com/p/146250334 (矩阵行列计算次序按照的是链接中提供的最优次序)
很惭愧地说,之前矩阵相乘一直在调函数,从来没有自己实现过,自己是非计算机科班出身,感觉基础确实一般。所以今天试着写了个多线程矩阵乘法作为练习,看看计算效率如何,如有问题欢迎交流指正。这里矩阵用的是 vector<vector<int> > 表示,绝对的速度上应该是不如数组,这里仅用于验证多线程的加速。
#include<iostream>
#include<vector>
#include<string>
#include<algorithm>
#include<thread>
#include<ctime>
using namespace std;
// f mn = d mt * e tn
vector<vector<int> > d;
vector<vector<int> > e;
vector<vector<int> > f;
//single thread
void mulSingle()
{
int m = d.size();
int t = d[0].size();
int n = e[0].size();
// ikj is faster than ijk (memory access issues)
for (int i = 0; i < m; ++i)
for (int k = 0; k < t; ++k)
{
int s = d[i][k];
for (int j = 0; j < n; ++j)
f[i][j] += s * e[k][j];
}
}
//multi thread
void mulMulti(int rowStart, int rowEnd)
{
int m = d.size();
int t = d[0].size();
int n = e[0].size();
// ikj is faster than ijk (memory access issues)
for (int i = rowStart; i < rowEnd; ++i)
for (int k = 0; k < t; ++k)
{
int s = d[i][k];
for (int j = 0; j < n; ++j)
f[i][j] += s * e[k][j];
}
}
// create a matrix
vector<vector<int> > createMat(int m, int n) {
vector<vector<int> > ans(m, vector<int>(n, 0));
for (int i = 0; i < m; i++)
for (int j = 0; j < n; j++)
ans[i][j] = i - j - 1; // arbitrary value
return ans;
}
int main()
{
clock_t startTime, endTime;
// initializing matrices
d = createMat(80, 50);
e = createMat(50, 22);
// f (m*n) = d (m*t) * e (t*n)
int m = d.size();
int t = d[0].size();
int n = e[0].size();
f.resize(m);
for (int i = 0; i < m; ++i)
f[i].resize(n);
//single thread
startTime = clock();
mulSingle();
endTime = clock();
//display
for (auto cur : f)
{
for (auto i : cur)
cout << i << " ";
cout << endl;
}
cout << "Single Thread Total Time : " << (double)(endTime - startTime)\
/ CLOCKS_PER_SEC << " s" << endl;
// initializing matrix
f.clear();
f.resize(m);
for (int i = 0; i < m; ++i)
f[i].resize(n);
cout << endl;
//multiple thread
startTime = clock();
int div = m / 4;
thread t1(mulMulti, 0, div);
thread t2(mulMulti, div, 2 * div);
thread t3(mulMulti, 2 * div, 3 * div);
thread t4(mulMulti, 3 * div, m);
t1.join();
t2.join();
t3.join();
t4.join();
endTime = clock();
//display
for (auto cur : f)
{
for (auto i : cur)
cout << i << " ";
cout << endl;
}
cout << "Multiple Thread Total Time : " << (double)(endTime - startTime)\
/ CLOCKS_PER_SEC << " s" << endl;
//this_thread::sleep_for(chrono::seconds(5));
system("pause");
return 0;
}
下图是单线程运算时间:0.063s
下图是多线程运算时间:0.041s
把矩阵行列数设大一些效果更明显