使用OpenMP加快OpenCV图像处理性能 | speed up opencv image processing with openmp

本文介绍了如何使用OpenMP提升OpenCV图像处理的性能。通过配置CMakeLists.txt,利用OpenMP的并行for循环、private、reduction等指令优化代码。文章详细讲解了OpenMP的语法和特性,并提供了示例,包括不同调度类型的效果对比,以及OpenCV与OpenMP结合的实践应用。
摘要由CSDN通过智能技术生成

本文首发于个人博客https://kezunlin.me/post/7a6ba82e/,欢迎阅读!

speed up opencv image processing with openmp

Series

Guide

config

  • linux/window: cmake with CXX_FLAGS=-fopenmp
  • window VS: VS also support openmp, C/C | Language | /openmp

usage

#include <omp.h>

#pragma omp parallel for
    for loop ...

code

#include <iostream>
#include <omp.h>

int main()
{
    omp_set_num_threads(4);
#pragma omp parallel for
    for (int i = 0; i < 8; i  )
    {
        printf("i = %d, I am Thread %d\n", i, omp_get_thread_num());
    }
    printf("\n");    

    return 0;
}

/*
i = 0, I am Thread 0
i = 1, I am Thread 0
i = 4, I am Thread 2
i = 5, I am Thread 2
i = 6, I am Thread 3
i = 7, I am Thread 3
i = 2, I am Thread 1
i = 3, I am Thread 1
*/

CMakeLists.txt

use CXX_FLAGS=-fopenmp in CMakeLists.txt

cmake_minimum_required(VERSION 3.0.0)

project(hello)

find_package(OpenMP REQUIRED)
if(OPENMP_FOUND)
    message("OPENMP FOUND")

    message([main] " OpenMP_C_FLAGS=${OpenMP_C_FLAGS}") # -fopenmp
    message([main] " OpenMP_CXX_FLAGS}=${OpenMP_CXX_FLAGS}") # -fopenmp
    message([main] " OpenMP_EXE_LINKER_FLAGS=${OpenMP_EXE_LINKER_FLAGS}") # ***

    # no use for xxx_INCLUDE_DIRS and xxx_libraries for OpenMP
    message([main] " OpenMP_INCLUDE_DIRS=${OpenMP_INCLUDE_DIRS}") # ***
    message([main] " OpenMP_LIBRARIES=${OpenMP_LIBRARIES}") # ***

    set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
    set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
endif()

add_executable(hello hello.cpp)
#target_link_libraries(hello xxx)

optionsopenmp

or use g hello.cpp -fopenmp to compile

view demo

list dynamic dependencies (ldd)

    ldd hello 
        linux-vdso.so.1 =>  (0x00007ffd71365000)
        libstdc  .so.6 => /usr/lib/x86_64-linux-gnu/libstdc  .so.6 (0x00007f8ea7f00000)
        libgomp.so.1 => /usr/lib/x86_64-linux-gnu/libgomp.so.1 (0x00007f8ea7cde000)
        libc.so.6 => /lib/x86_64-linux-gnu/libc.so.6 (0x00007f8ea7914000)
        libm.so.6 => /lib/x86_64-linux-gnu/libm.so.6 (0x00007f8ea760b000)
        /lib64/ld-linux-x86-64.so.2 (0x00007f8ea8282000)
        libgcc_s.so.1 => /lib/x86_64-linux-gnu/libgcc_s.so.1 (0x00007f8ea73f5000)
        libdl.so.2 => /lib/x86_64-linux-gnu/libdl.so.2 (0x00007f8ea71f1000)
        libpthread.so.0 => /lib/x86_64-linux-gnu/libpthread.so.0 (0x00007f8ea6fd4000)

libgomp.so.1 => /usr/lib/x86_64-linux-gnu/libgomp.so.1

list names (nm)

    nm hello 
    0000000000602080 B __bss_start
    0000000000602190 b completed.7594
                     U __cxa_atexit@@GLIBC_2.2.5
    0000000000602070 D __data_start
    0000000000602070 W data_start
    0000000000400b00 t deregister_tm_clones
    0000000000400b80 t __do_global_dtors_aux
    0000000000601df8 t __do_global_dtors_aux_fini_array_entry
    0000000000602078 d __dso_handle
    0000000000601e08 d _DYNAMIC
    0000000000602080 D _edata
    0000000000602198 B _end
    0000000000400d44 T _fini
    0000000000400ba0 t frame_dummy
    0000000000601de8 t __frame_dummy_init_array_entry
    0000000000400f18 r __FRAME_END__
    0000000000602000 d _GLOBAL_OFFSET_TABLE_
    0000000000400c28 t _GLOBAL__sub_I_main
                     w __gmon_start__
    0000000000400d54 r __GNU_EH_FRAME_HDR
                     U GOMP_parallel@@GOMP_4.0
                     U __gxx_personality_v0@@CXXABI_1.3
    00000000004009e0 T _init
    0000000000601df8 t __init_array_end
    0000000000601de8 t __init_array_start
    0000000000400d50 R _IO_stdin_used
                     w _ITM_deregisterTMCloneTable
                     w _ITM_registerTMCloneTable
    0000000000601e00 d __JCR_END__
    0000000000601e00 d __JCR_LIST__
                     w _Jv_RegisterClasses
    0000000000400d40 T __libc_csu_fini
    0000000000400cd0 T __libc_csu_init
                     U __libc_start_main@@GLIBC_2.2.5
    0000000000400bc6 T main
    0000000000400c3d t main._omp_fn.0
                     U omp_get_num_threads@@OMP_1.0
                     U omp_get_thread_num@@OMP_1.0
    0000000000400b40 t register_tm_clones
    0000000000400ad0 T _start
    0000000000602080 d __TMC_END__
    0000000000400bea t _Z41__static_initialization_and_destruction_0ii
                     U _ZNSolsEPFRSoS_E@@GLIBCXX_3.4
                     U _ZNSt8ios_base4InitC1Ev@@GLIBCXX_3.4
                     U _ZNSt8ios_base4InitD1Ev@@GLIBCXX_3.4
    0000000000602080 B _ZSt4cout@@GLIBCXX_3.4
                     U _ZSt4endlIcSt11char_traitsIcEERSt13basic_ostreamIT_T0_ES6_@@GLIBCXX_3.4
    0000000000602191 b _ZStL8__ioinit
                     U _ZStlsISt11char_traitsIcEERSt13basic_ostreamIcT_ES5_c@@GLIBCXX_3.4

omp_get_num_threads, omp_get_thread_num

OpenMP Introduction

OpenMP的指令格式

    #pragma omp directive [clause[clause]…]
    #pragma omp parallel private(i, j)

parallel is directive, private is clause

directive

  • parallel,用在一个代码段之前,表示这段代码将被多个线程并行执行
  • for,用于for循环之前,将循环分配到多个线程中并行执行,必须保证每次循环之间无相关性。
  • parallel for, parallel 和 for语句的结合,也是用在一个for循环之前,表示for循环的代码将被多个线程并行执行。
  • sections,用在可能会被并行执行的代码段之前
  • parallel sections,parallel和sections两个语句的结合
  • critical,用在一段代码临界区之前
  • single,用在一段只被单个线程执行的代码段之前,表示后面的代码段将被单线程执行。
  • flush,
  • barrier,用于并行区内代码的线程同步,所有线程执行到barrier时要停止,直到所有线程都执行到barrier时才继续往下执行。
  • atomic,用于指定一块内存区域被制动更新
  • master,用于指定一段代码块由主线程执行
  • ordered, 用于指定并行区域的循环按顺序执行
  • threadprivate, 用于指定一个变量是线程私有的。

parallel for

OpenMP 对可以多线程化的循环有如下五个要求:

  • 循环的变量变量(就是i)必须是有符号整形,其他的都不行。
  • 循环的比较条件必须是< <= > >=中的一种
  • 循环的增量部分必须是增减一个不变的值(即每次循环是不变的)。
  • 如果比较符号是< <=,那每次循环i应该增加,反之应该减小
  • 循环必须是没有奇奇怪怪的东西,不能从内部循环跳到外部循环,goto和break只能在循环内部跳转,异常必须在循环内部被捕获。

如果你的循环不符合这些条件,那就只好改写了.

avoid race condition

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值