OpenMP支持的编程语言包括C、C++和Fortran,简单的说,就是一种API,来编写多线程应用程序。通过使用简单的指令#pragma omp …就可以对程序进行多线程并行。OpenMP使得程序员可以把更多的精力投入到并行算法本身,而非其具体实现细节。对基于数据分集的多线程程序设计,它是一个很好的选择。但是,作为高层抽象,OpenMP并不适合需要复杂的线程间同步和互斥的场合。OpenMP的另一个缺点是不能在非共享内存系统(如计算机集群)上使用。在这样的系统上,MPI使用较多。
用 OpenMP 编写的程序在运行时采用 fork-join 并行执行模式。程序开始是以一个单进程运行,称为执行的主线程。主线程顺序运行到第 1 个并行块结构时就生成一个线程队,原来的主线程成为线程队的主线程。程序中被并行块包围起来的所有语句(包括块内被调用的子程序)在线程队中并行执行,一直到并行块执行完后,线程队中的线程中止,而主线程继续执行。一个程序中可以定义任意数目的并块,因此,在一个程序的执行中可以分叉、合并若干次。
使用
做是使用一些指令:
parallel :用在一个结构块之前,表示这段代码将被多个线程并行执行;
for:用于for循环语句之前,表示将循环计算任务分配到多个线程中并行执行,以实现任务分担,必须由编程人员自己保证每次循环之间无数据相关性;
parallel for :parallel和for指令的结合,也是用在for循环语句之前,表示for循环体的代码将被多个线程并行执行,它同时具有并行域的产生和任务分担两个功能;
sections :用在可被并行执行的代码段之前,用于实现多个结构块语句的任务分担,可并行执行的代码段各自用section指令标出(注意区分sections和section);
parallel sections:parallel和sections两个语句的结合,类似于parallel for;
single:用在并行域内,表示一段只被单个线程执行的代码;
critical:用在一段代码临界区之前,保证每次只有一个OpenMP线程进入;
flush:保证各个OpenMP线程的数据影像的一致性;
barrier:用于并行域内代码的线程同步,线程执行到barrier时要停下等待,直到所有线程都执行到barrier时才继续往下执行;
atomic:用于指定一个数据操作需要原子性地完成;
master:用于指定一段代码由主线程执行;
threadprivate:用于指定一个或多个变量是线程专用,后面会解释线程专有和私有的区别。
例子:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/time.h>
#include <iostream>
#include <omp.h>
using namespace std;
void test1(){
int nthreads, tid;
omp_set_num_threads(2);
/* Fork a team of threads giving them their own copies of variables */
#pragma omp parallel
{
/* Obtain thread number */
tid = omp_get_thread_num();
/* Only master thread does this */
if (omp_get_thread_num() == 1)
{
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
printf("Hello World from thread = 1\n" );
}
else if (omp_get_thread_num() == 0){
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
printf("Hello World from thread = 0\n" );
}
} /* All threads join master thread and disband */
return;
}
void test2()
{
#pragma omp parallel for
for(int k = 0; k < 2; k++)
{
if (omp_get_thread_num() == 1)
{
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
printf("Hello World from thread = 1\n" );
}
else if (omp_get_thread_num() == 0){
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
printf("Hello World from thread = 0\n" );
}
}
return;
}
void test() {
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
}
int main() {
cout << "CPU number:" << omp_get_num_procs() << endl;
double start = omp_get_wtime();
test1();
double end = omp_get_wtime();
cout << "Multi-thread Time is: " << (end - start)*1000 << endl;
double t1 = omp_get_wtime();
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
for (int i = 0; i < 100000; i++) {
int x = 1000, y = 100;
double num = x / y;
}
double t2 = omp_get_wtime();
cout << "Single Time is: " << (t2 - t1)*1000 << endl;
t1 = omp_get_wtime();
test2();
t2 = omp_get_wtime();
cout << "thread is: " << (t2 - t1)*1000 << endl;
return 0;
}
cmake:
cmake_minimum_required(VERSION 3.5)
project(exe)
SET(CMAKE_BUILD_TYPE "Release")
#SET(CMAKE_BUILD_TYPE "Debug")
SET(CMAKE_CXX_FLAGS_DEBUG "$ENV{CXXFLAGS} -O0 -Wall -g2 -ggdb ${CMAKE_CXX_FLAGS} ")
SET(CMAKE_CXX_FLAGS_RELEASE "$ENV{CXXFLAGS} -O3 -Wall ${CMAKE_CXX_FLAGS} ")
set(CMAKE_RUNTIME_OUTPUT_DIRECTORY "${CMAKE_SOURCE_DIR}/out")
FIND_PACKAGE( OpenMP REQUIRED)
if(OPENMP_FOUND)
message("OPENMP FOUND !!!!")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} -lstdc++ -pthread -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -lstdc++ -pthread -fopenmp")
set(CMAKE_EXE_LINKER_FLAGS"${CMAKE_EXE_LINKER_FLAGS}${OpenMP_EXE_LINKER_FLAGS} -lstdc++ -pthread -fopenmp")
endif()
add_definitions(-D__ARM_NEON)
if(USE_CVITEK1838)
add_definitions("-O3 -fpermissive -fPIC -mcpu=cortex-a53 -fno-aggressive-loop-optimizations -Wno-narrowing -ffunction-sections -fdata-sections -fstack-protector -DUSER_BIT_64 -D_FILE_OFFSET_BITS=64 -D_LARGEFILE_SOURCE -D_LARGEFILE64_SOURCE -DENABLE_NEON" ) #-DUSE_HI3559
include_directories(
#${SDK_DIR}/include
${THIRD_PARTY_LIB_DIR}/opencv-3.2_cvitek/include
../src/cvitek
../src
../inc
${SYS_SDK_DIR}/middleware/include
${SYS_SDK_DIR}/middleware/include/ive
${SYS_SDK_DIR}/middleware/include/isp/cv182x
#${IVE_DIR}/include/ive
${TPU_DIR}/include/cvimath
)
link_directories(${SYS_SDK_DIR}/middleware/lib64
${SYS_SDK_DIR}/middleware/lib64/3rd
${THIRD_PARTY_LIB_DIR}/opencv-3.2_cvitek/lib
)
set(IVE_LIBS2
${SYS_SDK_DIR}/middleware/lib64/libcvi_ive_tpu.so
#${SYS_SDK_DIR}/middleware/lib64/libcvi_ive_tpu.so
)
endif()
add_executable(test_openmp test_openmp.cpp)
target_link_libraries(test_openmp cvimath cviruntime cvikernel cvitracer)#vpu sys
一些问题
1.运行时候报错找不到库
加这个
FIND_PACKAGE( OpenMP REQUIRED)
if(OPENMP_FOUND)
message("OPENMP FOUND !!!!")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS} -lstdc++ -pthread -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS} -lstdc++ -pthread -fopenmp")
set(CMAKE_EXE_LINKER_FLAGS"${CMAKE_EXE_LINKER_FLAGS}${OpenMP_EXE_LINKER_FLAGS} -lstdc++ -pthread -fopenmp")
endif()
或者编译时候直接
g++ test_openmp.cpp -o test -fopenmp
或
gcc test_openmp.cpp -o test -fopenmp -lstdc++
添加so库到运行目录,这里找的是晶视的docker里找出来的库
2.运行效果,在pc上和在晶视芯片上,运行效率都低于单线程,目前网上发现同样问题的情况不少