AlexeyAB提出了yolov4网络并对Darknet进行了诸多改进,本文是基于由AlexeyAB改进的Darknet进行分析。
众所周知,翻转(flip)是数据增强中最常用的方法,Darknet也原生支持这个操作,在cfg文件中添加flip=1即可,但是其实在cfg文件中即使你没有添加该句,Darknet也会默认flip,除非你在cfg文件中添加flip=0。
【源码】
①https://github.com/AlexeyAB/darknet/blob/master/src/parser.c 1120行parse_net_options函数是解析cfg文件中训练参数信息的。从net->flip = option_find_int_quiet(options, "flip", 1);该行代码可以看出flip默认值是1的。
void parse_net_options(list *options, network *net)
{
net->max_batches = option_find_int(options, "max_batches", 0);
net->batch = option_find_int(options, "batch",1);
net->learning_rate = option_find_float(options, "learning_rate", .001);
net->learning_rate_min = option_find_float_quiet(options, "learning_rate_min", .00001);
net->batches_per_cycle = option_find_int_quiet(options, "sgdr_cycle", net->max_batches);
net->batches_cycle_mult = option_find_int_quiet(options, "sgdr_mult", 2);
net->momentum = option_find_float(options, "momentum", .9);
net->decay = option_find_float(options, "decay", .0001);
int subdivs = option_find_int(options, "subdivisions",1);
net->time_steps = option_find_int_quiet(options, "time_steps",1);
net->track = option_find_int_quiet(options, "track", 0);
net->augment_speed = option_find_int_quiet(options, "augment_speed", 2);
net->init_sequential_subdivisions = net->sequential_subdivisions = option_find_int_quiet(options, "sequential_subdivisions", subdivs);
if (net->sequential_subdivisions > subdivs) net->init_sequential_subdivisions = net->sequential_subdivisions = subdivs;
net->try_fix_nan = option_find_int_quiet(options, "try_fix_nan", 0);
net->batch /= subdivs; // mini_batch
const int mini_batch = net->batch;
net->batch *= net->time_steps; // mini_batch * time_steps
net->subdivisions = subdivs; // number of mini_batches
*net->seen = 0;
*net->cur_iteration = 0;
net->loss_scale = option_find_float_quiet(options, "loss_scale", 1);
net->dynamic_minibatch = option_find_int_quiet(options, "dynamic_minibatch", 0);
net->optimized_memory = option_find_int_quiet(options, "optimized_memory", 0);
net->workspace_size_limit = (size_t)1024*1024 * option_find_float_quiet(options, "workspace_size_limit_MB", 1024); // 1024 MB by default
net->adam = option_find_int_quiet(options, "adam", 0);
if(net->adam){
net->B1 = option_find_float(options, "B1", .9);
net->B2 = option_find_float(options, "B2", .999);
net->eps = option_find_float(options, "eps", .000001);
}
net->h = option_find_int_quiet(options, "height",0);
net->w = option_find_int_quiet(options, "width",0);
net->c = option_find_int_quiet(options, "channels",0);
net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
net->flip = option_find_int_quiet(options, "flip", 1);
net->blur = option_find_int_quiet(options, "blur", 0);
net->gaussian_noise = option_find_int_quiet(options, "gaussian_noise", 0);
net->mixup = option_find_int_quiet(options, "mixup", 0);
int cutmix = option_find_int_quiet(options, "cutmix", 0);
int mosaic = option_find_int_quiet(options, "mosaic", 0);
if (mosaic && cutmix) net->mixup = 4;
else if (cutmix) net->mixup = 2;
else if (mosaic) net->mixup = 3;
net->letter_box = option_find_int_quiet(options, "letter_box", 0);
net->mosaic_bound = option_find_int_quiet(options, "mosaic_bound", 0);
net->contrastive = option_find_int_quiet(options, "contrastive", 0);
net->contrastive_jit_flip = option_find_int_quiet(options, "contrastive_jit_flip", 0);
net->unsupervised = option_find_int_quiet(options, "unsupervised", 0);
if (net->contrastive && mini_batch < 2) {
printf(" Error: mini_batch size (batch/subdivisions) should be higher than 1 for Contrastive loss \n");
exit(0);
}
net->label_smooth_eps = option_find_float_quiet(options, "label_smooth_eps", 0.0f);
net->resize_step = option_find_float_quiet(options, "resize_step", 32);
net->attention = option_find_int_quiet(options, "attention", 0);
net->adversarial_lr = option_find_float_quiet(options, "adversarial_lr", 0);
net->max_chart_loss = option_find_float_quiet(options, "max_chart_loss", 20.0);
net->angle = option_find_float_quiet(options, "angle", 0);
net->aspect = option_find_float_quiet(options, "aspect", 1);
net->saturation = option_find_float_quiet(options, "saturation", 1);
net->exposure = option_find_float_quiet(options, "exposure", 1);
net->hue = option_find_float_quiet(options, "hue", 0);
net->power = option_find_float_quiet(options, "power", 4);
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
char *policy_s = option_find_str(options, "policy", "constant");
net->policy = get_policy(policy_s);
net->burn_in = option_find_int_quiet(options, "burn_in", 0);
#ifdef GPU
if (net->gpu_index >= 0) {
char device_name[1024];
int compute_capability = get_gpu_compute_capability(net->gpu_index, device_name);
#ifdef CUDNN_HALF
if (compute_capability >= 700) net->cudnn_half = 1;
else net->cudnn_half = 0;
#endif// CUDNN_HALF
fprintf(stderr, " %d : compute_capability = %d, cudnn_half = %d, GPU: %s \n", net->gpu_index, compute_capability, net->cudnn_half, device_name);
}
else fprintf(stderr, " GPU isn't used \n");
#endif// GPU
if(net->policy == STEP){
net->step = option_find_int(options, "step", 1);
net->scale = option_find_float(options, "scale", 1);
} else if (net->policy == STEPS || net->policy == SGDR){
char *l = option_find(options, "steps");
char *p = option_find(options, "scales");
char *s = option_find(options, "seq_scales");
if(net->policy == STEPS && (!l || !p)) error("STEPS policy must have steps and scales in cfg file");
if (l) {
int len = strlen(l);
int n = 1;
int i;
for (i = 0; i < len; ++i) {
if (l[i] == '#') break;
if (l[i] == ',') ++n;
}
int* steps = (int*)xcalloc(n, sizeof(int));
float* scales = (float*)xcalloc(n, sizeof(float));
float* seq_scales = (float*)xcalloc(n, sizeof(float));
for (i = 0; i < n; ++i) {
float scale = 1.0;
if (p) {
scale = atof(p);
p = strchr(p, ',') + 1;
}
float sequence_scale = 1.0;
if (s) {
sequence_scale = atof(s);
s = strchr(s, ',') + 1;
}
int step = atoi(l);
l = strchr(l, ',') + 1;
steps[i] = step;
scales[i] = scale;
seq_scales[i] = sequence_scale;
}
net->scales = scales;
net->steps = steps;
net->seq_scales = seq_scales;
net->num_steps = n;
}
} else if (net->policy == EXP){
net->gamma = option_find_float(options, "gamma", 1);
} else if (net->policy == SIG){
net->gamma = option_find_float(options, "gamma", 1);
net->step = option_find_int(options, "step", 1);
} else if (net->policy == POLY || net->policy == RANDOM){
//net->power = option_find_float(options, "power", 1);
}
}
②https://github.com/AlexeyAB/darknet/blob/master/src/data.c 159行matrix load_image_augment_paths函数是对输入图像进行数据增强。其中flip_image(crop);是对图像进行翻转。
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int w, int h, float angle, float aspect, float hue, float saturation, float exposure, int dontuse_opencv, int contrastive)
{
int i;
matrix X;
X.rows = n;
X.vals = (float**)xcalloc(X.rows, sizeof(float*));
X.cols = 0;
for(i = 0; i < n; ++i){
int size = w > h ? w : h;
image im;
const int img_index = (contrastive) ? (i / 2) : i;
if(dontuse_opencv) im = load_image_stb_resize(paths[img_index], 0, 0, 3);
else im = load_image_color(paths[img_index], 0, 0);
image crop = random_augment_image(im, angle, aspect, min, max, size);
int flip = use_flip ? random_gen() % 2 : 0;
if (flip)
flip_image(crop);
random_distort_image(crop, hue, saturation, exposure);
image sized = resize_image(crop, w, h);
//show_image(im, "orig");
//show_image(sized, "sized");
//show_image(sized, paths[img_index]);
//wait_until_press_key_cv();
//printf("w = %d, h = %d \n", sized.w, sized.h);
free_image(im);
free_image(crop);
X.vals[i] = sized.data;
X.cols = sized.h*sized.w*sized.c;
}
return X;
}
③https://github.com/AlexeyAB/darknet/blob/master/src/image.c 563行void flip_image(image a)是对图像翻转的函数。从for(j = 0; j < a.w/2; ++j)可以看出只对图像进行了水平翻转。此外,当你的训练数据是遥感图像或者显微镜下的图像,可以在准备数据集时对训练集中的图像进行垂直翻转和旋转一定角度,这都能起到增加样本的作用。
void flip_image(image a)
{
int i,j,k;
for(k = 0; k < a.c; ++k){
for(i = 0; i < a.h; ++i){
for(j = 0; j < a.w/2; ++j){
int index = j + a.w*(i + a.h*(k));
int flip = (a.w - j - 1) + a.w*(i + a.h*(k));
float swap = a.data[flip];
a.data[flip] = a.data[index];
a.data[index] = swap;
}
}
}
}