A tiny program to benchmark image transpose algorithms

Here is the code:

#include <stdio.h>
#include <xmmintrin.h>
#include <windows.h>

typedef __m128 Vec;

typedef unsigned long long value_t;

__forceinline value_t now()
{
    LARGE_INTEGER n;
    QueryPerformanceCounter(&n);
    return n.QuadPart;
}

inline void img_transpose(
    Vec *dst_img, 
    Vec *src_img, 
    const int src_w, 
    const int src_h)
{
#pragma omp parallel for
    for (int j = 0; j < src_w; ++j)
    {
        for (int i = 0; i < src_h; ++i)
        {
            dst_img[j * src_h + i] = src_img[i * src_w + j];
        }
    }
}

inline void img_transpose_block(
    Vec *dst_img, 
    Vec *src_img, 
    const int src_w, 
    const int src_h)
{
#pragma omp parallel for
    for (int j = 0; j < src_w; j += 8)
    {
        for (int i = 0; i < src_h; i += 8)
        {
            const int nsize = min(j + 8, src_w);
            const int msize = min(i + 8, src_h);

            for (int n = j; n < nsize; ++n)
            {
                for (int m = i; m < msize; ++m)
                {
                    dst_img[n * src_h + m] = src_img[m * src_w + n];
                }
            }
        }
    }
}

int main(int argc, char *argv[])
{
    //// performance benchmark ////

    const int w = 1280;
    const int h = 720;
    Vec *a = new Vec [w * h];
    Vec *b = new Vec [w * h];
    value_t start_time, end_time;


    LARGE_INTEGER freq;
    QueryPerformanceFrequency(&freq);
    double ms_per_tick = 1000.0 / (double)freq.QuadPart;



    start_time = now();

    for (int t = 0; t < 50; ++t)
    {
        img_transpose(b, a, w, h);
        img_transpose(a, b, h, w);
    }

    end_time = now();
    printf("img_transpose:          %f ms\n", (double)(end_time - start_time) * ms_per_tick);



    start_time = now();

    for (int t = 0; t < 50; ++t)
    {
        img_transpose_block(b, a, w, h);
        img_transpose_block(a, b, h, w);
    }

    end_time = now();
    printf("img_transpose_block:   %f ms\n", (double)(end_time - start_time) * ms_per_tick);


    delete [] a;
    delete [] b;


    //// algorithm validation ////
    const int width = 1080;
    const int height = 1920;
    Vec *src_img = new Vec [width * height];
    Vec *dst_img = new Vec [height * width];

    for (int j = 0; j < height; ++j)
    {
        for (int i = 0; i < width; ++i)
        {
            src_img[j * width + i].m128_i32[0] = i;
            src_img[j * width + i].m128_i32[1] = j;
        }
    }

    img_transpose_block(dst_img, src_img, width, height);

    for (int j = 0; j < width; ++j)
    {
        for (int i = 0; i < height; ++i)
        {
            int pi = dst_img[j * height + i].m128_i32[0];
            int pj = dst_img[j * height + i].m128_i32[1];

            if (pi != j || pj != i)
            {
                printf("Algorithm is wrong!!!\n");
                goto END_OF_PROGRAM;
            }
        }
    }

END_OF_PROGRAM:
    printf("All done\n");


    return 0;
}

 

posted on 2017-10-22 21:00 Len3d 阅读( ...) 评论( ...) 编辑 收藏

转载于:https://www.cnblogs.com/len3d/p/7711639.html

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值