验证darknet中前处理做图像缩放（双线性内插值法）scale的算法效果

置顶

papaofdoudou

已于 2023-04-15 06:41:40 修改

阅读量1.6k

点赞数 8

分类专栏：嵌入式系统数学文章标签：算法 python 开发语言

于 2022-03-17 17:37:22 首次发布

本文链接：https://blog.csdn.net/tugouxp/article/details/123551480

版权

关于图形scale原理的介绍可以参考下面的文章

用最近邻插值(Nearest Neighbor interpolation)进行图片缩放_papaofdoudou的博客-CSDN博客_最近邻插值算法原理个图片缩放的两种常见算法：最近邻域内插法(Nearest Neighbor interpolation) 双向性内插法(bilinear interpolation)本文主要讲述最近邻插值(Nearest Neighbor interpolation算法的原理以及python实现基本原理最简单的图像缩放算法就是最近邻插值。顾名思义，就是将目标图像各点的像素值设为源图像中与其最近的点。算法优点在与简单、速度快。如下图所示，一个4*4的图片缩放为8*8的图片。步骤： ...https://blog.csdn.net/tugouxp/article/details/120110219关于darknet前处理的流程，尤其是图形SCALE部分，可以参考下面的文章：

Darknet框架解读_papaofdoudou的博客-CSDN博客1.test_detector的调用路径：这里重点介绍load_alphabet函数：根据代码，这里是从data/labels/目录装载png图片，图片有8*95=760这些图片都是ASCII码，比如我们打开编号为65和97的PNG图片，看以下内容,分别是A和a.怀疑最后绘框上面的Label字母就是来源于这里，这样就不依赖于系统字库了，我们确认一下，用反证法。如果我们将所有b的图片替换成a的图片，那么bicycle应该能变成aicycle的，我们测试一下：我们..https://blog.csdn.net/tugouxp/article/details/120941276?spm=1001.2014.3001.5502DARKNET中使用的缩放算法是双线性内插值法，这里就实际验证一把DARKNET 中scale的工作原理与效果：

首先这是一张原图，画面中的是南京明城墙玄武门，玄武湖的正门。18年国庆带娃回学校的时候留念，一段令人怀念的岁月。

图像参数如下,可以看到是YUV420格式的，尺寸为1920*1080。

首先我们将其转换为RAW YUV444P 格式，目的是为了方便做SCALE，转换工具就用FFMPEG

ffmpeg -i 165823915.jpg -pix_fmt yuv444p xuanwumen.yuv444p.yuv

转换后的RAW YUV图像，可以通过ffplay命令查看，只是由于这个时候没有PARSER信息，需要手动输入图像的分辨率：

ffplay -pix_fmt yuv444p -f rawvideo -video_size 1920x1080 ./xuanwumen.yuv444p.yuv

至此，万事俱备，只欠代码了。

我们目的是要对YUV图作SCALE，由于YUV图形是NV12格式的，我们需要对Y和U部分和V部分分别作SCALE。

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <stddef.h>
#include <stdint.h>
#include <fcntl.h>
#include <unistd.h>
#include <assert.h>
#include <sys/ioctl.h>

#define DBG(fmt, ...)   do { printf("%s line %d, "fmt"\n", __func__, __LINE__, ##__VA_ARGS__); } while (0)

typedef struct image {
    int w;
    int h;
    int c;
    unsigned char *data;
} image;

static void dump_memory(uint8_t *buf, int32_t len)
{
    int i;
 
    printf("\n\rdump file memory:");
    for (i = 0; i < len; i ++)
    {
        if ((i % 16) == 0)
        {
            printf("\n\r%p: ", buf + i);
        }
        printf("0x%02x ", buf[i]);
    }

    printf("\n\r");

    return;
}

image make_empty_image(int w, int h, int c)
{
    image out;             
    out.data = 0;
    out.h = h;                                                                                                                                                                                              
    out.w = w;
    out.c = c;
    return out;
}

image copy_image(image p)
{
	image copy = p;
    copy.data = (unsigned char*)calloc(p.h * p.w * p.c, sizeof(float));
    memcpy(copy.data, p.data, p.h*p.w*p.c*sizeof(float));
    return copy;
}

image make_image(int w, int h, int c)
{
    image out = make_empty_image(w,h,c);
    out.data = (unsigned char*)calloc(h * w * c, sizeof(char));
    return out;
}

static void set_pixel(image m, int x, int y, int c, float val)
{
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;

    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
}

static float get_pixel(image m, int x, int y, int c)
{ 
    assert(x < m.w && y < m.h && c < m.c);
    return m.data[c*m.h*m.w + y*m.w + x];
} 

void free_image(image m)
{   
    if(m.data){
       free(m.data);
    }
} 

static void add_pixel(image m, int x, int y, int c, float val)
{            
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] += val;
}

image resize_image(image im, int w, int h)
{                                         
    if (im.w == w && im.h == h) return copy_image(im);

    image resized = make_image(w, h, im.c);
    image part = make_image(w, im.h, im.c);
    int r, c, k;                          
    float w_scale = (float)(im.w - 1) / (w - 1);
    float h_scale = (float)(im.h - 1) / (h - 1);
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < im.h; ++r){        
            for(c = 0; c < w; ++c){       
                float val = 0;            
                if(c == w-1 || im.w == 1){
                    val = get_pixel(im, im.w-1, r, k);
                } else {                  
                    float sx = c*w_scale; 
                    int ix = (int) sx;    
                    float dx = sx - ix;   
                    val = (1 - dx) * get_pixel(im, ix, r, k) + dx * get_pixel(im, ix+1, r, k);
                }                         
                set_pixel(part, c, r, k, val);
            }                             
        }                                 
    }                                     
    for(k = 0; k < im.c; ++k){            
        for(r = 0; r < h; ++r){           
           float sy = r*h_scale;         
           int iy = (int) sy;            
           float dy = sy - iy;           
           for(c = 0; c < w; ++c){       
               float val = (1-dy) * get_pixel(part, c, iy, k);
               set_pixel(resized, c, r, k, val);
           }                             
           if(r == h-1 || im.h == 1) continue;
           for(c = 0; c < w; ++c){       
               float val = dy * get_pixel(part, c, iy+1, k);
               add_pixel(resized, c, r, k, val);
           }                             
        }                                 
    }                                     

    free_image(part);                     
    return resized;                       
}

int main(int argc, char **argv)
{
	FILE *file;
	int width, height;
 
	DBG("in");

	if(argc != 4)
	{
		DBG("input error, you should use this program like that: program xxxx.yuv width height.");
		exit(-1);
	}
 
	width  = atoi(argv[2]);
	height = atoi(argv[3]);
 
	DBG("scale to width %d, height %d.", width, height);
	
	file = fopen(argv[1], "rb");
	if(file == NULL)
	{
		DBG("fatal error, open file %s failure, please check the file status.", argv[1]);
		exit(-1);
	}
 
	fseek(file, 0, SEEK_END);
	int filelen = ftell(file);
	
	DBG("file %s len %d byets.", argv[1], filelen);
 
	unsigned char *p = malloc(filelen);
	if(p == NULL)
	{
		DBG("malloc buffer failure for %s len %d.", argv[1], filelen);
		exit(-1);
	}
 
	memset(p, 0x00, filelen);
	fseek(file, 0, SEEK_SET);
 
	if(fread(p, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

    fclose(file);
 
	dump_memory(p, 32);
	dump_memory(p + filelen - 32, 32);

    image orig_Y = make_image(1920,1080, 1);
    image orig_U = make_image(1920,1080, 1);
    image orig_V = make_image(1920,1080, 1);
    memcpy(orig_Y.data, p + 0*1920*1080,1920*1080);
    memcpy(orig_U.data, p + 1*1920*1080,1920*1080);
    memcpy(orig_V.data, p + 2*1920*1080,1920*1080);

    image outputy = resize_image(orig_Y, width,height);
    image outputu = resize_image(orig_U, width,height);
    image outputv = resize_image(orig_V, width,height);

	file = fopen("./output.yuv", "wb+");
	if(file == NULL)
	{
		DBG("fatal error, open output file failure, please check the file status.");
		exit(-1);
	}
    
	unsigned char *o = malloc(width * height);
	if(o == NULL)
	{
		DBG("malloc output buffer failure.");
		exit(-1);
	}

	memset(o, 0x00, width * height);
	memcpy(o, outputy.data, width * height);
   
	filelen = width * height;
	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}
	memset(o, 0xb0, width * height);
	memcpy(o, outputu.data, width * height);

	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

	memset(o, 0xb0, width * height);
	memcpy(o, outputv.data, width * height);

	if(fwrite(o, 1, filelen, file) != filelen)
	{
		DBG("read file failure, size wrong.");
		exit(-1);
	}

	fflush(file);
	fsync(fileno(file));
	fclose(file);

    return 0;
}

执行如下命令：

$gcc main.c
$ffmpeg -i 165823915.jpg -pix_fmt yuv444p xuanwumen.yuv444p.yuv
$./a.out xuanwumen.yuv444p.yuv 352 352
$ffplay -pix_fmt yuv444p -f rawvideo -video_size 352x352 ./output.yuv

SCALE 352X352后输出结果：

最低0.47元/天解锁文章

papaofdoudou

关注

8
点赞
踩
2

收藏

觉得还不错? 一键收藏
打赏
0
评论
验证darknet中前处理做图像缩放（双线性内插值法）scale的算法效果

关于图形scale原理的介绍可以参考下面的文章用最近邻插值(Nearest Neighbor interpolation)进行图片缩放_papaofdoudou的博客-CSDN博客_最近邻插值算法原理个图片缩放的两种常见算法：最近邻域内插法(Nearest Neighbor interpolation) 双向性内插法(bilinear interpolation)本文主要讲述最近邻插值(Nearest Neighbor interpolation算法的原理以及python实现基本原理最简单的图像缩放算
复制链接

扫一扫