简述
会用多个版本来写。操作是用命令行来操作。
除了第二版本是在内容上基于第一个版本的完善,其他都是降低算法的复杂程度的,可以放心阅读
版本1
- 要求n被thread_count整除 不然可以会有计算错误。
#include <iostream>
#include <omp.h>
#define FUN(x) (x * x)
using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
if (argc < 5) return 0;
double global_result = 0.0;
double a, b;
int n;
int thread_count = strtol(argv[1], NULL, 10);
a = strtod(argv[2], NULL);
b = strtod(argv[3], NULL);
n = strtol(argv[4], NULL, 10);
// n 必须被 thread_count 整除
#pragma omp parallel num_threads (thread_count)
Trap(a, b, n, &global_result);
cout << "With n = " << n << " trapzoids, our estimate\n";
cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}
void Trap(double a, double b, int n, double *global_result_p) {
double h, x, my_result;
double local_a, local_b;
int i, local_n;
int my_rank = omp_get_thread_num();
int thread_count = omp_get_num_threads();
h = (b - a) / n;
local_n = n / thread_count; // same in all threads
local_a = a + my_rank * local_n * h;
local_b = local_a + local_n * h;
x = local_a; // initial x
my_result = (FUN(local_a) + FUN(local_b)) / 2;
for (i = 1; i < local_n; ++i) {
x += h;
my_result += FUN(x);
}
my_result *= h;
# pragma omp critical
*global_result_p += my_result;
}
编译好源文件之后,就用下面的命令执行
- 这里是用4个线程,来计算在0,1区间上的 x 2 x^2 x2的用n=64划分的梯形积分公式。结果接近1/3 可以通过提高n来提高精度。但这个版本只能计算n为thread_count的整数倍的时候
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
版本2
- 可以使用不被thread_count整除的n
- 改进代码: 添加了一个变量left表示剩余。剩余的让前left个线程每个再多算点。(一般来说线程数目都会比n要小很多。(不然在算法上就会不精确(n不够大的话)。))
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
实际的代码:
#include <iostream>
#include <omp.h>
#define FUN(x) (x * x)
using namespace std;
#pragma warning(disable : 4996)
void Trap(double a, double b, int n, double *global_result_p);
int main(int argc, char **argv) {
if (argc < 5) return 0;
double global_result = 0.0;
double a, b;
int n;
int thread_count = strtol(argv[1], NULL, 10);
a = strtod(argv[2], NULL);
b = strtod(argv[3], NULL);
n = strtol(argv[4], NULL, 10);
#pragma omp parallel num_threads (thread_count)
Trap(a, b, n, &global_result);
cout << "With n = " << n << " trapzoids, our estimate\n";
cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}
void Trap(double a, double b, int n, double *global_result_p) {
double h, x, my_result;
double local_a, local_b;
int i, local_n, left;
int my_rank = omp_get_thread_num();
int thread_count = omp_get_num_threads();
h = (b - a) / n;
local_n = n / thread_count; // same in all threads
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
local_b = local_a + local_n * h;
x = local_a; // initial x
my_result = (FUN(local_a) + FUN(local_b)) / 2;
for (i = 1; i < local_n; ++i) {
x += h;
my_result += FUN(x);
}
my_result *= h;
# pragma omp critical
*global_result_p += my_result;
}
发现精度不断的提升,在只改变n的数目的情况下。
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 64
With n = 64 trapzoids, our estimate
of the integral from 0 to 1 = 0.333374
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 65
With n = 65 trapzoids, our estimate
of the integral from 0 to 1 = 0.333373
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 66
With n = 66 trapzoids, our estimate
of the integral from 0 to 1 = 0.333372
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
版本3
- 对于部分对指针不太熟的人来说,我们可以做出下面的改进
- 修改部分:
#pragma omp parallel num_threads (thread_count)
{
double my_result = Trap(a, b, n);
#pragma omp critical
global_result += my_result;
}
#include <iostream>
#include <omp.h>
#define FUN(x) (x * x)
using namespace std;
#pragma warning(disable : 4996)
double Trap(double a, double b, int n);
int main(int argc, char **argv) {
if (argc < 5) return 0;
double global_result = 0.0;
double a, b;
int n;
int thread_count = strtol(argv[1], NULL, 10);
a = strtod(argv[2], NULL);
b = strtod(argv[3], NULL);
n = strtol(argv[4], NULL, 10);
#pragma omp parallel num_threads (thread_count)
{
double my_result = Trap(a, b, n);
#pragma omp critical
global_result += my_result;
}
cout << "With n = " << n << " trapzoids, our estimate\n";
cout << "of the integral from " << a << " to " << b <<" = "<< global_result<< endl;
}
double Trap(double a, double b, int n) {
double h, x, my_result;
double local_a, local_b;
int i, local_n, left;
int my_rank = omp_get_thread_num();
int thread_count = omp_get_num_threads();
h = (b - a) / n;
local_n = n / thread_count; // same in all threads
left = n % thread_count;
if (my_rank < left) local_n += 1;
if (my_rank < left){ local_a = a + my_rank * local_n * h; }
else { local_a = a + left * (local_n + 1) * h + (my_rank - left) * local_n * h; }
local_b = local_a + local_n * h;
x = local_a; // initial x
my_result = (FUN(local_a) + FUN(local_b)) / 2;
for (i = 1; i < local_n; ++i) {
x += h;
my_result += FUN(x);
}
my_result *= h;
return my_result;
}
结果
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 67
With n = 67 trapzoids, our estimate
of the integral from 0 to 1 = 0.33337
PS D:\C++\VS\repo\OpenMP-TEST\Debug> ./OpenMP-TEST 4 0 1 68
With n = 68 trapzoids, our estimate
of the integral from 0 to 1 = 0.333369
PS D:\C++\VS\repo\OpenMP-TEST\Debug>
版本4
- 使用规约变量和规约操作
- 会让代码更加简单
#pragma omp parallel num_threads (thread_count) reduction(+:global_result)
{
global_result += Trap(a, b, n);
}
- 跟版本3是一样的。就是让代码更加简单了而已,用了map-reduce的思路
reduction(<operator>: <variable list>)
前面的放操作符,后面的设置变量。- 但是有需要注意的,如果是减法就需要小心了。在每个线程上确实是使用了减法,但是全局上也是可能做了减法,(例如
-1 - (-5)= 4
)这样的事情是有可能发生的。一般来说,满足交换律的都不用担心这点hh