Darknet预处理数据增强中翻转(flip)注意事项

AlexeyAB提出了yolov4网络并对Darknet进行了诸多改进,本文是基于由AlexeyAB改进的Darknet进行分析。

众所周知,翻转(flip)是数据增强中最常用的方法,Darknet也原生支持这个操作,在cfg文件中添加flip=1即可,但是其实在cfg文件中即使你没有添加该句,Darknet也会默认flip,除非你在cfg文件中添加flip=0。

【源码】

https://github.com/AlexeyAB/darknet/blob/master/src/parser.c  1120行parse_net_options函数是解析cfg文件中训练参数信息的。从net->flip = option_find_int_quiet(options, "flip", 1);该行代码可以看出flip默认值是1的。

void parse_net_options(list *options, network *net)
{
    net->max_batches = option_find_int(options, "max_batches", 0);
    net->batch = option_find_int(options, "batch",1);
    net->learning_rate = option_find_float(options, "learning_rate", .001);
    net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
    net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", net->max_batches);
    net->batches_cycle_mult = option_find_int_quiet(options, "sgdr_mult", 2);
    net->momentum = option_find_float(options, "momentum", .9);
    net->decay = option_find_float(options, "decay", .0001);
    int subdivs = option_find_int(options, "subdivisions",1);
    net->time_steps = option_find_int_quiet(options, "time_steps",1);
    net->track = option_find_int_quiet(options, "track", 0);
    net->augment_speed = option_find_int_quiet(options, "augment_speed", 2);
    net->init_sequential_subdivisions = net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", subdivs);
    if (net->sequential_subdivisions > subdivs) net->init_sequential_subdivisions = net->sequential_subdivisions = subdivs;
    net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0);
    net->batch /= subdivs;          // mini_batch
    const int mini_batch = net->batch;
    net->batch *= net->time_steps;  // mini_batch * time_steps
    net->subdivisions = subdivs;    // number of mini_batches

    *net->seen = 0;
    *net->cur_iteration = 0;
    net->loss_scale = option_find_float_quiet(options, "loss_scale", 1);
    net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0);
    net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0);
    net->workspace_size_limit = (size_t)1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024);  // 1024 MB by default

    net->adam = option_find_int_quiet(options, "adam", 0);
    if(net->adam){
        net->B1 = option_find_float(options, "B1", .9);
        net->B2 = option_find_float(options, "B2", .999);
        net->eps = option_find_float(options, "eps", .000001);
    }

    net->h = option_find_int_quiet(options, "height",0);
    net->w = option_find_int_quiet(options, "width",0);
    net->c = option_find_int_quiet(options, "channels",0);
    net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
    net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
    net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
    net->flip = option_find_int_quiet(options, "flip", 1);
    net->blur = option_find_int_quiet(options, "blur", 0);
    net->gaussian_noise = option_find_int_quiet(options, "gaussian_noise", 0);
    net->mixup = option_find_int_quiet(options, "mixup", 0);
    int cutmix = option_find_int_quiet(options, "cutmix", 0);
    int mosaic = option_find_int_quiet(options, "mosaic", 0);
    if (mosaic && cutmix) net->mixup = 4;
    else if (cutmix) net->mixup = 2;
    else if (mosaic) net->mixup = 3;
    net->letter_box = option_find_int_quiet(options, "letter_box", 0);
    net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0);
    net->contrastive = option_find_int_quiet(options, "contrastive", 0);
    net->contrastive_jit_flip = option_find_int_quiet(options, "contrastive_jit_flip", 0);
    net->unsupervised = option_find_int_quiet(options, "unsupervised", 0);
    if (net->contrastive && mini_batch < 2) {
        printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n");
        exit(0);
    }
    net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
    net->resize_step = option_find_float_quiet(options, "resize_step", 32);
    net->attention = option_find_int_quiet(options, "attention", 0);
    net->adversarial_lr = option_find_float_quiet(options, "adversarial_lr", 0);
    net->max_chart_loss = option_find_float_quiet(options, "max_chart_loss", 20.0);

    net->angle = option_find_float_quiet(options, "angle", 0);
    net->aspect = option_find_float_quiet(options, "aspect", 1);
    net->saturation = option_find_float_quiet(options, "saturation", 1);
    net->exposure = option_find_float_quiet(options, "exposure", 1);
    net->hue = option_find_float_quiet(options, "hue", 0);
    net->power = option_find_float_quiet(options, "power", 4);

    if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");

    char *policy_s = option_find_str(options, "policy", "constant");
    net->policy = get_policy(policy_s);
    net->burn_in = option_find_int_quiet(options, "burn_in", 0);
#ifdef GPU
    if (net->gpu_index >= 0) {
        char device_name[1024];
        int compute_capability = get_gpu_compute_capability(net->gpu_index, device_name);
#ifdef CUDNN_HALF
        if (compute_capability >= 700) net->cudnn_half = 1;
        else net->cudnn_half = 0;
#endif// CUDNN_HALF
        fprintf(stderr, " %d : compute_capability = %d, cudnn_half = %d, GPU: %s \n", net->gpu_index, compute_capability, net->cudnn_half, device_name);
    }
    else fprintf(stderr, " GPU isn't used \n");
#endif// GPU
    if(net->policy == STEP){
        net->step = option_find_int(options, "step", 1);
        net->scale = option_find_float(options, "scale", 1);
    } else if (net->policy == STEPS || net->policy == SGDR){
        char *l = option_find(options, "steps");
        char *p = option_find(options, "scales");
        char *s = option_find(options, "seq_scales");
        if(net->policy == STEPS && (!l || !p)) error("STEPS policy must have steps and scales in cfg file");

        if (l) {
            int len = strlen(l);
            int n = 1;
            int i;
            for (i = 0; i < len; ++i) {
                if (l[i] == '#') break;
                if (l[i] == ',') ++n;
            }
            int* steps = (int*)xcalloc(n, sizeof(int));
            float* scales = (float*)xcalloc(n, sizeof(float));
            float* seq_scales = (float*)xcalloc(n, sizeof(float));
            for (i = 0; i < n; ++i) {
                float scale = 1.0;
                if (p) {
                    scale = atof(p);
                    p = strchr(p, ',') + 1;
                }
                float sequence_scale = 1.0;
                if (s) {
                    sequence_scale = atof(s);
                    s = strchr(s, ',') + 1;
                }
                int step = atoi(l);
                l = strchr(l, ',') + 1;
                steps[i] = step;
                scales[i] = scale;
                seq_scales[i] = sequence_scale;
            }
            net->scales = scales;
            net->steps = steps;
            net->seq_scales = seq_scales;
            net->num_steps = n;
        }
    } else if (net->policy == EXP){
        net->gamma = option_find_float(options, "gamma", 1);
    } else if (net->policy == SIG){
        net->gamma = option_find_float(options, "gamma", 1);
        net->step = option_find_int(options, "step", 1);
    } else if (net->policy == POLY || net->policy == RANDOM){
        //net->power = option_find_float(options, "power", 1);
    }

}

https://github.com/AlexeyAB/darknet/blob/master/src/data.c   159行matrix load_image_augment_paths函数是对输入图像进行数据增强。其中flip_image(crop);是对图像进行翻转。

matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv, int contrastive)
{
    int i;
    matrix X;
    X.rows = n;
    X.vals = (float**)xcalloc(X.rows, sizeof(float*));
    X.cols = 0;

    for(i = 0; i < n; ++i){
        int size = w > h ? w : h;
        image im;
        const int img_index = (contrastive) ? (i / 2) : i;
        if(dontuse_opencv) im = load_image_stb_resize(paths[img_index], 0, 0, 3);
        else im = load_image_color(paths[img_index], 0, 0);

        image crop = random_augment_image(im, angle, aspect, min, max, size);
        int flip = use_flip ? random_gen() % 2 : 0;
        if (flip)
            flip_image(crop);
        random_distort_image(crop, hue, saturation, exposure);

        image sized = resize_image(crop, w, h);

        //show_image(im, "orig");
        //show_image(sized, "sized");
        //show_image(sized, paths[img_index]);
        //wait_until_press_key_cv();
        //printf("w = %d, h = %d \n", sized.w, sized.h);

        free_image(im);
        free_image(crop);
        X.vals[i] = sized.data;
        X.cols = sized.h*sized.w*sized.c;
    }
    return X;
}

https://github.com/AlexeyAB/darknet/blob/master/src/image.c  563行void flip_image(image a)是对图像翻转的函数。从for(j = 0; j < a.w/2; ++j)可以看出只对图像进行了水平翻转。此外,当你的训练数据是遥感图像或者显微镜下的图像,可以在准备数据集时对训练集中的图像进行垂直翻转和旋转一定角度,这都能起到增加样本的作用。

void flip_image(image a)
{
    int i,j,k;
    for(k = 0; k < a.c; ++k){
        for(i = 0; i < a.h; ++i){
            for(j = 0; j < a.w/2; ++j){
                int index = j + a.w*(i + a.h*(k));
                int flip = (a.w - j - 1) + a.w*(i + a.h*(k));
                float swap = a.data[flip];
                a.data[flip] = a.data[index];
                a.data[index] = swap;
            }
        }
    }
}

 

  • 0
    点赞
  • 10
    收藏
    觉得还不错? 一键收藏
  • 1
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值