【openMP并行计算】计算π

最新推荐文章于 2024-05-11 05:03:54 发布

风起猿涌

最新推荐文章于 2024-05-11 05:03:54 发布

阅读量5.1k

点赞数 6

分类专栏：并行计算文章标签：并行计算 openMP

本文链接：https://blog.csdn.net/Gilgame/article/details/93734456

版权

并行计算专栏收录该内容

1 篇文章 0 订阅

订阅专栏

本系列文章均为个人学习笔记

Linux环境对OpenMP的支持：
在Linux上编译和运行OpenMP程序
编译OpenMP程序： gcc -fopenmp a.c
运行OpenMP程序： ./a.out

1. 串行计算π

#include <stdio.h>
#include <omp.h>
static long num_steps = 100000000;
double step;
int main ()
{
	  int i;
	  double x, pi, sum = 0.0;
	  double start_time, run_time;

	  step = 1.0/(double) num_steps;

        	 
	  start_time = omp_get_wtime();

	  for (i=1;i<= num_steps; i++){
		  x = (i-0.5)*step;
		  sum = sum + 4.0/(1.0+x*x);
	  }

	  pi = step * sum;
	  run_time = omp_get_wtime() - start_time;
	  printf("\n pi with %ld steps is %lf in %lf seconds\n ",num_steps,pi,run_time);
}

结果：
在这里插入图片描述

2. 使用并行域并行化的程序:

#include <stdio.h>
#include <omp.h>

#define MAX_THREADS 4

static long num_steps = 100000000;
double step;
int main ()
{
   int i,j;
   double pi, full_sum = 0.0;
   double start_time, run_time;
   double sum[MAX_THREADS];

   step = 1.0/(double) num_steps;

   for (j=1;j<=MAX_THREADS ;j++) {

      omp_set_num_threads(j);
      full_sum=0.0;
      start_time = omp_get_wtime();

      #pragma omp parallel //并行域开始，每个线程(0和1)都会执行该代码
      {
         int i;
         int id = omp_get_thread_num();
         int numthreads = omp_get_num_threads();
         double x;

         sum[id] = 0.0;

         if (id == 0) //保证只有一个线程输出ID
            printf(" num_threads = %d",numthreads);

         for (i=id;i< num_steps; i+=numthreads){
            x = (i+0.5)*step;
            sum[id] = sum[id] + 4.0/(1.0+x*x);
         }
      }

      for(full_sum = 0.0, i=0;i<j;i++){
         full_sum += sum[i];
      }

      pi = step * full_sum;
      run_time = omp_get_wtime() - start_time;
      printf("\n pi is %f in %f seconds %d thrds \n",pi,run_time,j);
   }
}	
//共4个线程参加计算，其中线程0进行迭代步0,4,...线程1进行迭代步1,5,....

结果：
在这里插入图片描述
我们发现使用并行计算结果耗时更多，因为 False sharing。（自行百度）

3.private字句和critical制导语句并行化：

#include <stdio.h>
#include <omp.h>

#define MAX_THREADS 4

static long num_steps = 100000000;
double step;
int main ()
{
	int i,j;
	double pi, full_sum = 0.0;
	double start_time, run_time;
	double sum[MAX_THREADS];

	step = 1.0/(double) num_steps;

	for(j = 1; j <= MAX_THREADS; j++)
	{
		omp_set_num_threads(j);
		full_sum = 0.0;
		start_time = omp_get_wtime();

		#pragma omp parallel private(i) //该子句表示 i 变量对于每个线程是私有的
		{
			int id = omp_get_thread_num();
			int numthreads = omp_get_num_threads();
			double x;

			double partial_sum = 0;

			#pragma omp single	//作用和上一份代码一样，保证只有一个线程输出ID
				printf(" num_threads = %d",numthreads);

			for (i = id; i < num_steps; i += numthreads){
				x = (i+0.5)*step;
				partial_sum += + 4.0/(1.0+x*x);
			}
				
			#pragma omp critical	//指定代码段在同一时刻只能由一个线程进行执行
				full_sum += partial_sum;
		}
		
		pi = step * full_sum;
		run_time = omp_get_wtime() - start_time;
		printf("\n pi is %f in %f seconds %d threds \n ",pi,run_time,j);
	}
}
//共4个线程参加计算，其中线程0进行迭代步0,4....，线程1进行迭代步1,5....

结果;
在这里插入图片描述
速度明显快了很多！

4.并行规约并行化:

#include <stdio.h>
#include <omp.h>
static long num_steps = 100000000;
double step;
int main ()
{
	int i;
	double x, pi, sum = 0.0;
	double start_time, run_time;

	step = 1.0/(double) num_steps;
	
	for (i = 1; i <= 4; i++)
	{
		sum = 0.0;
		omp_set_num_threads(i);
		start_time = omp_get_wtime();

#pragma omp parallel  
{
#pragma omp single	
		printf(" num_threads = %d",omp_get_num_threads());

#pragma omp for reduction(+:sum)	//每个线程保留一份私有拷贝sum,最后对线程中所以sum进行+规约，并更新sum的全局值
		for (i=1;i<= num_steps; i++){
			x = (i-0.5)*step;
			sum = sum + 4.0/(1.0+x*x);
		}
}
		pi = step * sum;
		run_time = omp_get_wtime() - start_time;
		printf("\n pi is %f in %f seconds and %d threads\n",pi,run_time,i);
	}
}	
//共4个线程参加计算，其中线程0进行迭代步0~24999，线程1进行迭代步24999~50000.

结果：
在这里插入图片描述
速度一般般

风起猿涌

关注

6
点赞
踩
64

收藏

觉得还不错? 一键收藏
0
评论
【openMP并行计算】计算π

Linux环境对OpenMP的支持：在Linux上编译和运行OpenMP程序编译OpenMP程序： gcc -fopenmp a.c运行OpenMP程序： ./a.out1. 串行计算π#include <stdio.h>#include <omp.h>static long num_steps = 100000000;double step;int ma...
复制链接

扫一扫

专栏目录