11111

m0_56074653

于 2023-05-19 14:01:55 发布

阅读量44

点赞数

文章标签： c++ windows 算法

本文链接：https://blog.csdn.net/m0_56074653/article/details/130765421

版权

#include "stdio.h"
#include "stdlib.h"
#include<math.h>
#include<time.h>
#include<ctime>
#include "omp.h"
#include "windows.h"

#define ROW1 256 //左矩阵行数
#define COL1 256 //左矩阵列数
#define ROW2 256 //右矩阵行数
#define COL2 256 //右矩阵列数
#define MAX_THREADS 128 //最大线程数

static int a[ROW1][COL1];
static int b[ROW2][COL2];
static int c[ROW1][COL2];
double dqFreq;

void initArray();
void init_c_array();
void print(int* array, int row, int col);
LARGE_INTEGER now_time_count();
double cost_time_s(LARGE_INTEGER start_time_count, LARGE_INTEGER end_time_count);
int autoTuning();
int main()
{
   omp_set_num_threads(MAX_THREADS);
   LARGE_INTEGER f;
   QueryPerformanceFrequency(&f);
   dqFreq = (double)f.QuadPart;
   initArray();
   autoTuning();
   return 0;
}
/** 函数作用：初始化左右矩阵和结果矩阵
* 函数输入参数：空
* 函数返回值：空
* 补充：
*/
void initArray()
{
   int i, j;
   srand((unsigned int)time(NULL));
   for (i = 0; i < ROW1; i++)
   {
       for (j = 0; j < COL1; j++)
       {
           a[i][j] = rand() % 100;
       }
   }
   for (i = 0; i < ROW2; i++)
   {
       for (j = 0; j < COL2; j++)
       {
           b[i][j] = rand() % 100;
       }
   }
   for (i = 0; i < ROW1; i++)
   {
       for (j = 0; j < COL2; j++)
       {
           c[i][j] = 0;
       }
   }
}
/** 函数作用：清空结果矩阵
* 函数输入参数：空
* 函数返回值：空
* 补充：
*/
void init_c_array()
{
   for (int i = 0; i < ROW1; i++)
   {
       for (int j = 0; j < COL2; j++)
       {
           c[i][j] = 0;
       }
   }
}
/** 函数作用：
* 函数输入参数：
* 函数返回值：
* 补充：
*/
int autoTuning()
{
   LARGE_INTEGER start_time_count_serial = now_time_count();
   init_c_array();
   for (int i = 0; i < ROW1; i++)
       for (int j = 0; j < COL2; j++)
           for (int k = 0; k < COL1; k++)
               c[i][j] += a[i][k] * b[k][j];
   LARGE_INTEGER end_time_count_serial = now_time_count();
   double time_cost_s = cost_time_s(start_time_count_serial, end_time_count_serial);
   printf("进行串行计算：大矩阵相乘总用时%15.13fs\n", time_cost_s);
   double min_time_cost_s = 1.79769313486231570E+308; int min_time_threads = 0;
   for (int num_threads = 8; num_threads <= MAX_THREADS; num_threads++)
   {
       init_c_array();
       LARGE_INTEGER start_time_count = now_time_count();
       omp_set_num_threads(num_threads);
#pragma omp parallel for collapse(2)
       for (int i = 0; i < ROW1; i++)
           for (int j = 0; j < COL2; j++)
               for (int k = 0; k < COL1; k++)
                   c[i][j] += a[i][k] * b[k][j];

       LARGE_INTEGER end_time_count = now_time_count();
       double time_cost_s = cost_time_s(start_time_count, end_time_count);
       printf("线程数：%3d，大矩阵相乘总用时%15.13fs\n", num_threads, time_cost_s);
       if (time_cost_s < min_time_cost_s) {
           min_time_cost_s = time_cost_s;
           min_time_threads = num_threads;
       }
   }
   printf("%d行%d列矩阵与%d行%d列矩阵相乘\n处理器数%d\n大矩阵相乘最佳线程数为：%d\n总用时%15.13fs\n", ROW1, COL1, ROW2, COL2, omp_get_num_procs(), min_time_threads, min_time_cost_s);
   return min_time_threads;
}
/** 函数作用：返回开始/结束时刻计数器的值
* 函数输入参数：空
* 函数返回值：计数器的值
* 补充：也可以使用omp_get_wtime()计算时间
*/
LARGE_INTEGER now_time_count()
{
   LARGE_INTEGER time_now_count;
   QueryPerformanceCounter(&time_now_count);
   return time_now_count;
}
/** 函数作用：计算时间间隔
* 函数输入参数 start_time ：开始计数器的值
* 函数输入参数 end_time_count ：结束计数器的值
* 函数返回值：时间间隔,s
* 补充：也可以使用omp_get_wtime()计算时间
*/
double cost_time_s(LARGE_INTEGER start_time_count, LARGE_INTEGER end_time_count)
{
   double run_time_s = (end_time_count.QuadPart - start_time_count.QuadPart) / dqFreq;
   return run_time_s;
}