多核异构并行计算OpenMP---并行控制
指令parallel
/*File:parallel.cpp*/
#include<stdio.h>
#include<omp.h>
int main(){
int tid,nthreads;
nthreads = omp_get_num_threads(); // 获取正在运行的线程数量
tid = omp_get_thread_num(); // 获取正在运行的线程的id号
//omp_in_parallel()是用来检测代码是并行执行还是串行执行
if(omp_in_parallel()){
printf("In the parallel region! id=%d ",tid);
printf("Number of threads:%d\n",nthreads);
}else{
printf("In the serial region! id=%d ",tid);
printf("Number of threads:%d\n",nthreads);
}
printf("-----before parallel region");
printf("\n");
printf("\n");
//上面的代码段是串行执行,下面是并行区
#pragma omp parallel private(tid,nthreads)
//private表示变量列表中列出的变量对于每个线程来说均是私有变量
//即每个线程都有自己的私有变量副本
{
nthreads = omp_get_num_threads();//线程数量
tid = omp_get_thread_num(); //线程id
if(omp_in_parallel()){
printf("in the parallel region! id=%d ",tid);
printf("number of threads:%d\n",nthreads);
}else{
printf("in the serial region!id=%d ",tid);
printf("number of threads:%d\n",nthreads);
}
}
printf("\n");
printf("-----after parallel region\n");
//并行区域结束,再次执行串行区
nthreads=omp_get_num_threads();
tid=omp_get_thread_num();
if(omp_in_parallel()){
printf("in the parallel region! id=%d ",tid);
printf("number of threads:%d\n",nthreads);
}else{
printf("in the serial region!id=%d ",tid);
printf("number of threads:%d\n",nthreads);
}
return 0;
}
//运行结果
$ ./perallel.out
in the serial region! id=0 number of threads:1
-----before parallel region
in the parallel region! id=7 in the parallel region! id=2 in the parallel region! id=1 number of threads:8
in the parallel region! id=0 number of threads:8
in the parallel region! id=5 number of threads:8
number of threads:8
in the parallel region! id=3 number of threads:8
number of threads:8
in the parallel region! id=6 number of threads:8
in the parallel region! id=4 number of threads:8
// 由于在前面parallel定义的并行区域内线程数没有显式声明,因此并行区域内运行的线程数量等于硬件系统所能提供的最大线程数
// 线程组内子线程号从0到7,而且执行顺序是随机的
-----after parallel region
in the serial region!id=0 number of threads:1
//退出并行区域后,串行区代码段采用单线程方式执行,线程号为0
设定线程数量
对并行区域设置线程数量是必不可少的关键步骤通常有四种途径:
- 默认方式
- 调用环境库函数
- 使用num_thread指令,实际上是一种静态模式
- 使用环境变量OMP_NUM_THREADS,它实际上也是一种静态模式。
在这几种方法中,比较常用的模式是静态模式和动态模式,嵌套模式比较复杂,普通的编程人员一般不会涉及。
默认模式
所谓默认模式,就是在程序中对并行计算的线程数量不作显式声明。此方法的优越性在于程序的扩展性好。
静态模式
/* File:snt.cpp */
#include<stdio.h>
#include<omp.h>
int main(){
int nthreads_set, nthreads,tid;
//默认模式下
#pragma omp parallel private(tid,nthreads)
{
nthreads=omp_get_num_threads();
tid = omp_get_thread_num();
printf("number of threads=(default)%d ",nthreads);
printf("tid=%d\n",tid);
}
printf("------before OMP_SET_NUM_THREADS\n");
printf("\n");
nthreads_set=3;
//设置线程数量
omp_set_num_threads(nthreads_set);
printf("set_number_threads=%d\n",nthreads_set);
//设置线程数量后
#pragma omp parallel private(tid,nthreads)
{
nthreads=omp_get_num_threads();
tid = omp_get_thread_num();
printf("number of threads(default)=%d ",nthreads);
printf("tid=%d\n",tid);
printf("------------------------\n");
}
return 0;
}
//运行结果
$ ./snt.out
number of threads=(default)8 tid=0
number of threads=(default)8 tid=7
number of threads=(default)8 tid=6
number of threads=(default)8 tid=1
number of threads=(default)8 tid=4
number of threads=(default)8 tid=3
number of threads=(default)8 tid=2
number of threads=(default)8 tid=5
------before OMP_SET_NUM_THREADS
set_number_threads=3
number of threads(default)=3 tid=1
------------------------
number of threads(default)=3 tid=0
------------------------
number of threads(default)=3 tid=2
------------------------
动态模式
/* File:sd.cpp */
#include<stdio.h>
#include<omp.h>
int main(){
int nthreads_set, nthreads,tid;
nthreads_set=3;
omp_set_dynamic(1);
//这上下两个指令是成对使用的
omp_set_num_threads(nthreads_set);
printf("set_number_threads=%d\n",nthreads_set);
printf("dynamic region(1 or 0):%d",omp_get_dynamic());
printf("\n");
#pragma omp parallel private(tid,nthreads)
{
nthreads=omp_get_num_threads();
tid = omp_get_thread_num();
printf("number of threads=%d ",nthreads);
printf("tid=%d\n",tid);
printf("------------------------\n");
}
return 0;
}
//运行结果
$ ./sd.out
set_number_threads=3
dynamic region(1 or 0):1
number of threads=2 tid=0
------------------------
number of threads=2 tid=1
------------------------
嵌套模式与num_threads子句
// sn.cpp
#include<stdio.h>
#include<omp.h>
int main(){
omp_set_nested(1);
omp_set_dynamic(0);
printf("nested region(1 or 0):%d\n",omp_get_nested());
printf("\n");
#pragma omp parallel num_threads(2)
//开启两个线程
{
if(omp_get_thread_num() == 0){
//如果由主线程运行则
omp_set_num_threads(4);
}else{
omp_set_num_threads(3);
}
#pragma omp master
printf("* * * * outer zone:active_level=%d, team_size=%d\n",
omp_get_active_level(), omp_get_team_size(omp_get_active_level()));
printf("outer:thread_ID=%d,thread_in_team:%d\n",
omp_get_thread_num(), omp_get_num_threads());
#pragma omp parallel
{
#pragma omp master
printf("-------inner zone:active_level=%d, team_size=%d\n",
omp_get_active_level(), omp_get_team_size(omp_get_active_level()));
printf("inner:thread_ID=%d,threads_in_team=%d\n",
omp_get_thread_num(),omp_get_num_threads());
}
}
return 0;
}
//运行结果
$ ./sn.out
nested region(1 or 0):1
* * * * outer zone:active_level=1, team_size=2
outer:thread_ID=0,thread_in_team:2
outer:thread_ID=1,thread_in_team:2
-------inner zone:active_level=2, team_size=4
inner:thread_ID=0,threads_in_team=4
inner:thread_ID=3,threads_in_team=4
inner:thread_ID=1,threads_in_team=4
inner:thread_ID=2,threads_in_team=4
inner:thread_ID=2,threads_in_team=3
-------inner zone:active_level=2, team_size=3
inner:thread_ID=0,threads_in_team=3
inner:thread_ID=1,threads_in_team=3
/* nc.cpp */
#include<stdio.h>
#include<omp.h>
#define m 5
#define n 4
int main(){
int array1[m],array2[n];
omp_set_nested(1);
omp_set_dynamic(0);
#pragma omp parallel sections shared(array1,array2)num_threads(3)
{
#pragma omp section
{
printf("* * * * *outer section 1:active_level=%d,team_size=%d",
omp_get_active_level(), omp_get_team_size(omp_get_active_level()));
printf(" id=%d, threads_in_team=%d\n\n",
omp_get_thread_num(), omp_get_num_threads());
#pragma omp parallel for shared(array1)num_threads(3)
for(int i=0;i<m;i++){
array1[i]=i;
printf("-----inner section 1:i=%d active_level=%d,team_size=%d",i,
omp_get_active_level(), omp_get_team_size(omp_get_active_level()));
printf(" id=%d,threads_in_team=%d \n",
omp_get_thread_num(), omp_get_num_threads());
}
}
#pragma omp section
{
printf("* * * * * outer section 2:active_level=%d,team_size=%d",
omp_get_active_level(), omp_get_team_size(omp_get_active_level()));
printf(" id=%d,threads_in_team=%d\n",
omp_get_thread_num(), omp_get_num_threads());
#pragma omp parallel for shared(array2)num_threads(2)
for(int j=0;j<n;j++){
array2[j]=j+10;
printf("----inner section2:j=%d active_level=%d,team_size=%d",j,
omp_get_active_level(), omp_get_team_size(omp_get_active_level()));
printf(" id=%d,threads_in_tean=%d \n",
omp_get_thread_num(), omp_get_num_threads());
}
}
}
return 0;
}
//运行结果
$ ./nc.out
* * * * *outer section 1:active_level=1,team_size=3 id=0, threads_in_team=3
* * * * * outer section 2:active_level=1,team_size=3 id=1,threads_in_team=3
-----inner section 1:i=0 active_level=2,team_size=3
-----inner section 1:i=4 active_level=2,team_size=3 id=2,threads_in_team=3
----inner section2:j=0 active_level=2,team_size=2 id=0,threads_in_tean=2
----inner section2:j=1 active_level=2,team_size=2 id=0,threads_in_tean=2
-----inner section 1:i=2 active_level=2,team_size=3 id=1,threads_in_team=3
-----inner section 1:i=3 active_level=2,team_size=3 id=1,threads_in_team=3
id=0,threads_in_team=3
-----inner section 1:i=1 active_level=2,team_size=3 id=0,threads_in_team=3
----inner section2:j=2 active_level=2,team_size=2 id=1,threads_in_tean=2
----inner section2:j=3 active_level=2,team_size=2 id=1,threads_in_tean=2
条件并行子句if
/* ipp.cpp */
#include<stdio.h>
#include<omp.h>
void printnumthreads(int n){
int nthreads;
#pragma omp parallel private(nthreads)if(n>10)num_threads(4)
{
nthreads=omp_get_num_threads();
printf("number of threads=%d, n=%d\n",nthreads,n);
}
return ;
}
int main(){
printnumthreads(2);
printf("\n");
printnumthreads(20);
return 0;
}
//运行结果
$ ./ipp.out
number of threads=1, n=2
number of threads=4, n=20
number of threads=4, n=20
number of threads=4, n=20
number of threads=4, n=20