基本思想:OpenMP进行加减乘除运算的规约处理 reduction 可以支持的运算符号 +、-、*、/、&&、|、^、|| 符号
#pragma omp parallel for reduction(+:sum)
for(int i=0;i<num;i++)
{
....
}
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
using namespace std;
using namespace chrono;
void sequentialProgram(int num)
{
int sum=0;
for(int i=0;i<num;i++)
{
sum=sum+i;
printf("sum=%d the current thread id: %d\n",sum,omp_get_thread_num());
}
std::cout<<"sum="<<sum<<std::endl;
}
void parallelProgram(int num) {
int sum=0;
#pragma omp parallel for reduction(+:sum)
for(int i=0;i<num;i++)
{
sum=sum+i;
printf("i=%d sum=%d the current thread id: %d\n",i,sum,omp_get_thread_num());
}
std::cout<<" sum="<<sum<<std::endl;
}
int main() {
int num=omp_get_num_procs();//12
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果i 可以看出 在增加归约之后,结果和串行程序保持一致,随着叠加次数增加,其时间明显比串行时间的少
F:\OpenMP\cmake-build-debug\OpenMP.exe
sum=0 the current thread id: 0
sum=1 the current thread id: 0
sum=3 the current thread id: 0
sum=6 the current thread id: 0
sum=10 the current thread id: 0
sum=15 the current thread id: 0
sum=21 the current thread id: 0
sum=28 the current thread id: 0
sum=36 the current thread id: 0
sum=45 the current thread id: 0
sum=55 the current thread id: 0
sum=66 the current thread id: 0
sum=66
sequentialProgram elapse time: 0.0376821 seconds
i=1 sum=1 the current thread id: 1
i=8 sum=8 the current thread id: 8
i=9 sum=9 the current thread id: 9
i=4 sum=4 the current thread id: 4
i=2 sum=2 the current thread id: 2
i=7 sum=7 the current thread id: 7
i=10 sum=10 the current thread id: 10
i=6 sum=6 the current thread id: 6
i=5 sum=5 the current thread id: 5
i=0 sum=0 the current thread id: 0
i=3 sum=3 the current thread id: 3
i=11 sum=11 the current thread id: 11
sum=66
parallelProgram elapse time: 0.0163177 seconds
Process finished with exit code 0
同样的逻辑代码
shared 对一个变量的同一个内存进行读写
#pragma omp parallel for shared(sum)
for(int i=0;i<num;i++)
{
.....
}
simd 编译器可以忽略矢量依赖关系,使循环尽可能实现矢量友好,并尊重用户对同时执行多个循环迭代的意图。
#pragma omp simd
for(int i=0;i<num;i++)
{
......
}
测试代码
#include <iostream>
#include <omp.h>
#include<chrono>
using namespace std;
using namespace chrono;
static omp_lock_t m_lock;
void sequentialProgram(int num)
{
int sum=0;
for(int i=0;i<num;i++)
{
sum=sum+i;
// printf("sum=%d the current thread id: %d\n",sum,omp_get_thread_num());
}
std::cout<<"sum="<<sum<<std::endl;
}
void parallelProgram(int num) {
int sum=0;
omp_init_lock(&m_lock);
#pragma omp parallel for
for(int i=0;i<num;i++)
{
omp_set_lock(&m_lock);
{
sum=sum+i;
}
omp_unset_lock(&m_lock);
}
std::cout<<" sum="<<sum<<std::endl;
omp_destroy_lock(&m_lock);
sum=0;
#pragma omp parallel for
for(int i=0;i<num;i++)
{
#pragma omp critical(sum)
sum=sum+i;
//printf("i=%d sum=%d the current thread id: %d\n",i,sum,omp_get_thread_num());
}
std::cout<<" sum="<<sum<<std::endl;
sum=0;
#pragma omp parallel for
for(int i=0;i<num;i++)
{
#pragma omp atomic
sum=sum+i;
//printf("i=%d sum=%d the current thread id: %d\n",i,sum,omp_get_thread_num());
}
std::cout<<" sum="<<sum<<std::endl;
sum=0;
#pragma omp parallel for shared(sum)
for(int i=0;i<num;i++)
{
sum=sum+i;
//printf("i=%d sum=%d the current thread id: %d\n",i,sum,omp_get_thread_num());
}
std::cout<<" sum="<<sum<<std::endl;
sum=0;
#pragma omp parallel for reduction(+:sum)
for(int i=0;i<num;i++)
{
sum=sum+i;
//printf("i=%d sum=%d the current thread id: %d\n",i,sum,omp_get_thread_num());
}
std::cout<<" sum="<<sum<<std::endl;
sum=0;
#pragma omp simd
for(int i=0;i<num;i++)
{
sum=sum+i;
//printf("i=%d sum=%d the current thread id: %d\n",i,sum,omp_get_thread_num());
}
std::cout<<" sum="<<sum<<std::endl;
}
int main() {
int num=omp_get_num_procs();//12
auto start_time=std::chrono::steady_clock::now();
sequentialProgram(num);
auto end_time=std::chrono::steady_clock::now();
std::cout<<"sequentialProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
start_time=std::chrono::steady_clock::now();
parallelProgram(num);
end_time=std::chrono::steady_clock::now();
std::cout<<"parallelProgram elapse time: "<<std::chrono::duration<double>(end_time-start_time).count()<<" seconds"<<std::endl;
return 0;
}
测试结果
F:\OpenMP\cmake-build-debug\OpenMP.exe
sum=66
sequentialProgram elapse time: 0.0011415 seconds
sum=66
sum=66
sum=66
sum=66
sum=66
sum=66
parallelProgram elapse time: 0.0028554 seconds
Process finished with exit code 0