【opencv】——parallel_for的使用例子(附代码)

opencv, parral_for

测试对比纯for循环和parral_for的计算耗时。
code

#include <iostream>
#include <opencv2/core.hpp>
#include<stdlib.h>
 
class ParallelAdd : public cv::ParallelLoopBody
{
public:
	ParallelAdd(int* src1, int* src2, int* out, int blc, int size):src1(src1), src2(src2), out(out), blc(blc), size(size)
	{	}
 
	void operator()(const cv::Range& range) const
	{
		for (int i = 4; i < 17; i++)
		{
			int64 sum = 0;
			for (int j = range.start; j < range.end; j++)
			{
				int p1 = src1[j];
				int p2 = src2[j];
				int t = (p1 - blc) * i + blc;
				t = t >= 0 ? t : 0;
				t = t <= 4095 ? t : 4095;		// clip
				t = t - p2;
				t = t >=0 ? t : - t;					// abs
				sum +=t;
			}
			out[i-4] = (int32_t)sum / size;
		}
	}
 
private:
	int* src1;
	int* src2;
	int* out;
	int blc;
	int size;
};
 

 void test_for(int* src1, int* src2, int* out, int blc, int size)
{
	for (int i = 4; i < 17; i++)
	{
		int64 sum = 0;
		int64 sum1 = 0;
		int64 sum2 = 0;
		int64 sum3 = 0;
		for (int j = 0; j < size / 4; j++)
		{
			// np.mean(np.abs(((short - blc) * i + blc).clip(0, 4095) - long))
				int p1 = src1[4 * j + 0];
				int p2 = src2[4 * j + 0]  ;
				int t = (p1 - blc) * i + blc;
				t = t >= 0 ? t : 0;
				t = t <= 4095 ? t : 4095;		// clip
				t = t - p2;
				t = t >=0 ? t : - t;					// abs
				sum +=t;

				p1 = src1[4 * j + 1];
				p2 = src2[4 * j + 1];
				t = (p1 - blc) * i + blc;
				t = t >= 0 ? t : 0;
				t = t <= 4095 ? t : 4095;		// clip
				t = t - p2;
				t = t >=0 ? t : - t;					// abs
				sum1 +=t;

				p1 = src1[4 * j + 2];
				p2 = src2[4 * j + 2];
				t = (p1 - blc) * i + blc;
				t = t >= 0 ? t : 0;
				t = t <= 4095 ? t : 4095;		// clip
				t = t - p2;
				t = t >=0 ? t : - t;					// abs
				sum2 +=t;

				p1 = src1[4 * j + 3];
				p2 = src2[4 * j + 3];
				t = (p1 - blc) * i + blc;
				t = t >= 0 ? t : 0;
				t = t <= 4095 ? t : 4095;		// clip
				t = t - p2;
				t = t >=0 ? t : - t;					// abs
				sum3 +=t;
		}
		out[i-4] = (int32_t)(sum + sum1 + sum2 + sum3)/ size;
	}
	
}
 
int main()
{
    int height = 1920;
    int width = 1080;
	int blc = 23;
	int out_dim = 13;

	int bytes = height * width * sizeof(int);
	int size = height * width;
	int *src1 = (int*)malloc(bytes);
	memset(src1, 1, bytes);

	int *src2 = (int*) malloc(bytes);
	memset(src2, 2, bytes);

	int *src3 = (int*) malloc(out_dim * sizeof(int));
 
	int64 t1 = cv::getTickCount();

	//直接for循环
	test_for(src1, src2, src3, blc, size);
	int64 t2 = cv::getTickCount();
	std::cout << "time: " << (t2 - t1)/cv::getTickFrequency() * 1000 << " ms" << std::endl;
 
	cv::parallel_for_(cv::Range(0, size), ParallelAdd(src1, src2, src3, blc, size));//隐式调用,并发
	int64 t3 = cv::getTickCount();
	std::cout << "parallel time: " << (t3 - t2) / cv::getTickFrequency() * 1000 << " ms" << std::endl;

 
	return 0;
}
 
/*
time: 86.7311 ms
parallel time: 19.0727 ms
*/

耗时情况如下:
/*
time: 86.7311 ms
parallel time: 19.0727 ms
*/

上次测试的,在数据量比较少的时候,parallel_for能做到和cuda差不多的计算耗时。

  • 0
    点赞
  • 2
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值