test1:
compiler directive in outer layer
#include <stdio.h>
#include <omp.h>
int main(){
int i,j,k, tid;
#pragma omp parallel for private(j,k,tid)
for(i = 0; i < 300; i++){
for(j = 0; j< 200; j++){
tid = omp_get_thread_num();
printf("i is %d, j = %d, tid = %d\n", i, j, tid);
}
}
return 0;
}
results:
real 0m3.153s
user 0m0.827s
sys 0m1.033s
compiler directives in inner loop:
#include <stdio.h>
#include <omp.h>
int main(){
int i,j,k, tid;
for(i = 0; i < 300; i++){
#pragma omp parallel for private(k,tid)
for(j = 0; j< 200; j++){
tid = omp_get_thread_num();
printf("i is %d, j = %d, tid = %d\n", i, j, tid);
}
}
return 0;
}
results:
real 0m4.313s
user 0m10.508s
sys 0m0.496s
在外层平行计算要比内层快 如果循环次数很多的时候