题目
利用并行求解
思路
高斯积分
随着高斯点个数的增加,精度增加;
梯形逼近
采用微积分的基本概念,采用无限小梯形去逼近形状
利用OpenMP多线程加速
C++实现
头文件
#include <math.h>
#include <iostream>
#include <iomanip>
#include <omp.h>
#include <time.h>
#define NUM_THREADS 8
高斯积分
void func(const double& x, const double& w, double& y){
y += w*2. / (1. + x * x);
}
void Gauss(const int& n, double* s, double* w){
if (n == 1) {
s[0] = 0.; w[0] = 2.;
}
if (n == 2) {
s[0] = -0.57735; s[1] = 0.57735;
w[0] = 1.; w[1] = 1.;
}
else if (n == 3) {
s[0] = -0.7745967; s[1] = 0.; s[2] = 0.7745967;
w[0] = 0.5555556; w[1] = 0.8888889; w[2] = 0.5555556;
}
else if (n == 4) {
s[0] = -0.8611363; s[1] = -0.3399810; s[2] = 0.3399810; s[3] = 0.8611363;
w[0] = 0.3478548; w[1] = 0.6521452; w[2] = 0.6521452; w[3] = 0.3478548;
}
}
auto Guass = [&](const int& n){
double *s = new double[n];
double *w = new double[n];
Gauss(n, s, w);
double sum = 0.;
for (int i = 0; i < n; i++) {
func(s[i], w[i], sum);
}
std::cout << "pi = " << sum << std::endl;
delete[] s;
delete[] w;
};
梯形逼近
double func(const double& x){
return 4. / (1. + x * x);
}
double dTrape(const double& x1, const double& x2){
double y1 = func(x1);
double y2 = func(x2);
return (y1 + y2)*(x2 - x1) / 2.;
}
单线程
auto TrapeSingle = [&](const int& n){
clock_t Time1 = clock();
double x1,x2;
double total_sum_omp = 0.;
for (int i = 0; i < n; i++) {
x1 = i*1. / n;
x2 = (i + 1)*1. / n;
total_sum_omp += dTrape(x1, x2);
}
clock_t Time2 = clock();
std::cout << "n = " << n << " ,pi = " << std::setprecision(20) << total_sum_omp << std::endl;
std::cout << "Time of Single Thread is: " << (Time2 - Time1) / 1000. << " s " << std::endl;
};
多程
auto TrapeMulti = [&](const int& n, int NumThreads) {
clock_t Time1 = clock();
double total_sum_omp = 0.;
double *local_sum = new double[NumThreads];
for (int i = 0; i < NumThreads; i++)
{
local_sum[i] = 0.;
}
omp_set_num_threads(NumThreads);
#pragma omp parallel for
for (int i = 0; i < n; i++)
{
int k = omp_get_thread_num();
double x1 = i*1. / n;
double x2 = (i + 1)*1. / n;
local_sum[k] += dTrape(x1, x2);
}
for (int i = 0; i < NumThreads; i++)
{
total_sum_omp += local_sum[i];
}
clock_t Time2 = clock();
std::cout << "n = " << n << " ,pi = " << std::setprecision(20) << total_sum_omp << std::endl;
std::cout << "Time of " << NumThreads << " Threads is: " << (Time2 - Time1) / 1000. << " s " << std::endl;
delete[] local_sum;
};
主程序
void main(int argc, char* argv[])
{
auto Guass = [&](const int& n){…………} ;
auto TrapeSingle = [&](const int& n){…………};
auto TrapeMulti = [&](const int& n, int NumThreads) {…………};
Guass(1);
Guass(2);
Guass(3);
Guass(4);
TrapeSingle(10);
TrapeSingle(100);
TrapeSingle(1000);
TrapeSingle(10000);
TrapeSingle(100000);
TrapeSingle(1000000);
TrapeSingle(10000000);
TrapeMulti(10, NUM_THREADS);
TrapeMulti(100, NUM_THREADS);
TrapeMulti(1000, NUM_THREADS);
TrapeMulti(10000, NUM_THREADS);
TrapeMulti(100000, NUM_THREADS);
TrapeMulti(1000000, NUM_THREADS);
TrapeMulti(10000000, NUM_THREADS);
}
结果
利用高斯求解
Num of Guass point = 1 , pi = 4
Num of Guass point = 2 , pi = 3
Num of Guass point = 3 , pi = 3.16667
Num of Guass point = 4 , pi = 3.13726
利用单线程梯形逼近
n = 10 ,pi = 3.1399259889071591
Time of Single Thread is: 0 s
n = 100 ,pi = 3.1415759869231294
Time of Single Thread is: 0 s
n = 1000 ,pi = 3.1415924869231291
Time of Single Thread is: 0 s
n = 10000 ,pi = 3.1415926519231214
Time of Single Thread is: 0.001 s
n = 100000 ,pi = 3.1415926535730865
Time of Single Thread is: 0.0089999999999999993 s
n = 1000000 ,pi = 3.1415926535895733
Time of Single Thread is: 0.085000000000000006 s
n = 10000000 ,pi = 3.1415926535892185
Time of Single Thread is: 0.93799999999999994 s
利用多线程梯形逼近
n = 10 ,pi = 3.1399259889071591
Time of 8 Threads is: 0 s
n = 100 ,pi = 3.1415759869231281
Time of 8 Threads is: 0 s
n = 1000 ,pi = 3.1415924869231264
Time of 8 Threads is: 0 s
n = 10000 ,pi = 3.1415926519231236
Time of 8 Threads is: 0 s
n = 100000 ,pi = 3.1415926535731291
Time of 8 Threads is: 0.0030000000000000001 s
n = 1000000 ,pi = 3.141592653589623
Time of 8 Threads is: 0.029000000000000001 s
n = 10000000 ,pi = 3.1415926535897452
Time of 8 Threads is: 0.255 s
OpenMp使用
-
注意 omp_get_thread_num() 与 omp_get_num_threads() 的区别:
omp_get_thread_num()是返回当前线程编号,及当前运行的线程编号
omp_get_num_threads() 是返回可用的总的线程数,比如计算机可用8个线程,就返回的是8 -
多线程中变量的作用域
#pragma omp parallel for
for (int i = 0; i < n; i++)
{
int k = omp_get_thread_num();
double x1 = i*1. / n;
double x2 = (i + 1)*1. / n;
local_sum[k] += dTrape(x1, x2);
}
上一段代码,如果换成以下形式就会出错
double x1, x2;
#pragma omp parallel for
for (int i = 0; i < n; i++)
{
int k = omp_get_thread_num();
x1 = i*1. / n;
x2 = (i + 1)*1. / n;
local_sum[k] += dTrape(x1, x2);
}
因为x1和x2的作用域要在同一个线程才正确,第二种方式会产生线程间的冲突,所以错误