给定两个矩阵A和B,矩阵大小均为1024 * 1024,先使用单线程进行计算,代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#define ORDER 1024
#define AVAL 3.0
#define BVAL 5.0
#define TOL 0.001
int main(int argc, char *argv[])
{
int Ndim = ORDER, Pdim = ORDER, Mdim = ORDER;
double *A, *B, *C, tmp;
double start, end;
A = (double *)malloc(Ndim * Pdim * sizeof(double));
B = (double *)malloc(Pdim * Mdim * sizeof(double));
C = (double *)malloc(Ndim * Mdim * sizeof(double));
for (int i = 0; i < Ndim; i++)
for (int j = 0; j < Pdim; j++)
*(A + (i * Ndim + j)) = AVAL;
for (int i = 0; i < Pdim; i++)
for (int j = 0; j < Mdim; j++)
*(B + (i * Pdim + j)) = BVAL;
for (int i = 0; i < Ndim; i++)
for (int j = 0; j < Mdim; j++)
*(C + (i * Ndim + j)) = 0.0;
start = omp_get_wtime();
for (int i = 0; i < Ndim; i++)
{
for (int j = 0; j < Mdim; j++)
{
tmp = 0.0;
for (int k = 0; k < Pdim; k++)
{
tmp += *(A + (i * Ndim + k)) * *(B + (k * Pdim + j));
}
*(C + (i * Ndim + j)) = tmp;
}
}
end = omp_get_wtime();
printf("Order %d multiplication in %f seconds \n", ORDER, end - start);
free(A);
free(B);
free(C);
return 0;
}
执行时间为 63.432267秒
使用openmp再进行一次计算,代码如下:
#include <stdio.h>
#include <stdlib.h>
#include <omp.h>
#define ORDER 1024
#define AVAL 3.0
#define BVAL 5.0
#define TOL 0.001
int main(int argc, char *argv[])
{
int Ndim = ORDER, Pdim = ORDER, Mdim = ORDER;
double *A, *B, *C, tmp;
double start, end;
A = (double *)malloc(Ndim * Pdim * sizeof(double));
B = (double *)malloc(Pdim * Mdim * sizeof(double));
C = (double *)malloc(Ndim * Mdim * sizeof(double));
for (int i = 0; i < Ndim; i++)
for (int j = 0; j < Pdim; j++)
*(A + (i * Ndim + j)) = AVAL;
for (int i = 0; i < Pdim; i++)
for (int j = 0; j < Mdim; j++)
*(B + (i * Pdim + j)) = BVAL;
for (int i = 0; i < Ndim; i++)
for (int j = 0; j < Mdim; j++)
*(C + (i * Ndim + j)) = 0.0;
start = omp_get_wtime();
#pragma omp parallel for
for (int i = 0; i < Ndim; i++)
{
for (int j = 0; j < Mdim; j++)
{
double tmp = 0.0;
for(int k = 0; k < Pdim; k++)
{
tmp += *(A + (i * Ndim + k)) * *(B + (k * Pdim + j));
}
*(C + (i * Ndim + j)) = tmp;
}
}
end = omp_get_wtime();
printf("Order %d multiplication in %f seconds \n", ORDER, end - start);
free(A);
free(B);
free(C);
return 0;
}
使用 4 线程计算后,执行时间为 19.173928秒