#include <stdlib.h>
#include “darknet.h”
#include “network.h”
#include “region_layer.h”
#include “cost_layer.h”
#include “utils.h”
#include “parser.h”
#include “box.h”
#include “demo.h”
#include “option_list.h”
#ifndef __COMPAR_FN_T
#define __COMPAR_FN_T
typedef int (__compar_fn_t)(const void, const void*);
#ifdef __USE_GNU
typedef __compar_fn_t comparison_fn_t;
#endif
#endif
#include “http_stream.h”
int check_mistakes = 0;
static int coco_ids[] = { 1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90 };
char *GetFilename(char *p)
{
static char name[100]={""};
char *q = strrchr(p,’\’) + 1;
//printf("%s\n", q)
strncpy(name,q,strlen(q)-4);
return name;
}
void train_detector(char *datacfg, char *cfgfile, char *weightfile, int gpus, int ngpus, int clear, int dont_show, int calc_map, int mjpeg_port, int show_imgs, int benchmark_layers, char chart_path)
{
list *options = read_data_cfg(datacfg);
char *train_images = option_find_str(options, “train”, “data/train.txt”);
char *valid_images = option_find_str(options, “valid”, train_images);
char *backup_directory = option_find_str(options, “backup”, “/backup/”);
network net_map;
if (calc_map) {
FILE* valid_file = fopen(valid_images, "r");
if (!valid_file) {
printf("\n Error: There is no %s file for mAP calculation!\n Don't use -map flag.\n Or set valid=%s in your %s file. \n", valid_images, train_images, datacfg);
getchar();
exit(-1);
}
else fclose(valid_file);
cuda_set_device(gpus[0]);
printf(" Prepare additional network for mAP calculation...\n");
net_map = parse_network_cfg_custom(cfgfile, 1, 1);
net_map.benchmark_layers = benchmark_layers;
const int net_classes = net_map.layers[net_map.n - 1].classes;
int k; // free memory unnecessary arrays
for (k = 0; k < net_map.n - 1; ++k) free_layer_custom(net_map.layers[k], 1);
char *name_list = option_find_str(options, "names", "data/names.list");
int names_size = 0;
char **names = get_labels_custom(name_list, &names_size);
if (net_classes != names_size) {
printf("\n Error: in the file %s number of names %d that isn't equal to classes=%d in the file %s \n",
name_list, names_size, net_classes, cfgfile);
if (net_classes > names_size) getchar();
}
free_ptrs((void**)names, net_map.layers[net_map.n - 1].classes);
}
srand(time(0));
char *base = basecfg(cfgfile);
printf("%s\n", base);
float avg_loss = -1;
network* nets = (network*)xcalloc(ngpus, sizeof(network));
srand(time(0));
int seed = rand();
int k;
for (k = 0; k < ngpus; ++k) {
srand(seed);
#ifdef GPU
cuda_set_device(gpus[k]);
#endif
nets[k] = parse_network_cfg(cfgfile);
nets[k].benchmark_layers = benchmark_layers;
if (weightfile) {
load_weights(&nets[k], weightfile);
}
if (clear) {
*nets[k].seen = 0;
*nets[k].cur_iteration = 0;
}
nets[k].learning_rate *= ngpus;
}
srand(time(0));
network net = nets[0];
const int actual_batch_size = net.batch * net.subdivisions;
if (actual_batch_size == 1) {
printf("\n Error: You set incorrect value batch=1 for Training! You should set batch=64 subdivision=64 \n");
getchar();
}
else if (actual_batch_size < 8) {
printf("\n Warning: You set batch=%d lower than 64! It is recommended to set batch=64 subdivision=64 \n", actual_batch_size);
}
int imgs = net.batch * net.subdivisions * ngpus;
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
data train, buffer;
layer l = net.layers[net.n - 1];
int classes = l.classes;
float jitter = l.jitter;
list *plist = get_paths(train_images);
int train_images_num = plist->size;
char **paths = (char **)list_to_array(plist);
const int init_w = net.w;
const int init_h = net.h;
const int init_b = net.batch;
int iter_save, iter_save_last, iter_map;
iter_save = get_current_iteration(net);
iter_save_last = get_current_iteration(net);
iter_map = get_current_iteration(net);
float mean_average_precision = -1;
float best_map = mean_average_precision;
load_args args = { 0 };
args.w = net.w;
args.h = net.h;
args.c = net.c;
args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.classes = classes;
args.flip = net.flip;
args.jitter = jitter;
args.num_boxes = l.max_boxes;
net.num_boxes = args.num_boxes;
net.train_images_num = train_images_num;
args.d = &buffer;
args.type = DETECTION_DATA;
args.threads = 64; // 16 or 64
args.angle = net.angle;
args.gaussian_noise = net.gaussian_noise;
args.blur = net.blur;
args.mixup = net.mixup;
args.exposure = net.exposure;
args.saturation = net.saturation;
args.hue = net.hue;
args.letter_box = net.letter_box;
if (dont_show && show_imgs) show_imgs = 2;
args.show_imgs = show_imgs;
#ifdef OPENCV
args.threads = 6 * ngpus; // 3 for - Amazon EC2 Tesla V100: p3.2xlarge (8 logical cores) - p3.16xlarge
//args.threads = 12 * ngpus; // Ryzen 7 2700X (16 logical cores)
mat_cv* img = NULL;
float max_img_loss = 5;
int number_of_lines = 100;
int img_size = 1000;
char windows_name[100];
sprintf(windows_name, “chart_%s.png”, base);
img = draw_train_chart(windows_name, max_img_loss, net.max_batches, number_of_lines, img_size, dont_show, chart_path);
#endif //OPENCV
if (net.track) {
args.track = net.track;
args.augment_speed = net.augment_speed;
if (net.sequential_subdivisions) args.threads = net.sequential_subdivisions * ngpus;
else args.threads = net.subdivisions * ngpus;
args.mini_batch = net.batch / net.time_steps;
printf("\n Tracking! batch = %d, subdiv = %d, time_steps = %d, mini_batch = %d \n", net.batch, net.subdivisions, net.time_steps, args.mini_batch);
}
//printf(" imgs = %d \n", imgs);
pthread_t load_thread = load_data(args);
int count = 0;
double time_remaining, avg_time = -1, alpha_time = 0.01;
//while(i*imgs < N*120){
while (get_current_iteration(net) < net.max_batches) {
if (l.random && count++ % 10 == 0) {
float rand_coef = 1.4;
if (l.random != 1.0) rand_coef = l.random;
printf("Resizing, random_coef = %.2f \n", rand_coef);
float random_val = rand_scale(rand_coef); // *x or /x
int dim_w = roundl(random_val*init_w / net.resize_step + 1) * net.resize_step;
int dim_h = roundl(random_val*init_h / net.resize_step + 1) * net.resize_step;
if (random_val < 1 && (dim_w > init_w || dim_h > init_h)) dim_w = init_w, dim_h = init_h;
int max_dim_w = roundl(rand_coef*init_w / net.resize_step + 1) * net.resize_step;
int max_dim_h = roundl(rand_coef*init_h / net.resize_step + 1) * net.resize_step;
// at the beginning (check if enough memory) and at the end (calc rolling mean/variance)
if (avg_loss < 0 || get_current_iteration(net) > net.max_batches - 100) {
dim_w = max_dim_w;
dim_h = max_dim_h;
}
if (dim_w < net.resize_step) dim_w = net.resize_step;
if (dim_h < net.resize_step) dim_h = net.resize_step;
int dim_b = (init_b * max_dim_w * max_dim_h) / (dim_w * dim_h);
int new_dim_b = (int)(dim_b * 0.8);
if (new_dim_b > init_b) dim_b = new_dim_b;
args.w = dim_w;
args.h = dim_h;
int k;
if (net.dynamic_minibatch) {
for (k = 0; k < ngpus; ++k) {
(*nets[k].seen) = init_b * net.subdivisions * get_current_iteration(net); // remove this line, when you will save to weights-file both: seen & cur_iteration
nets[k].batch = dim_b;
int j;
for (j = 0; j < nets[k].n; ++j)
nets[k].layers[j].batch = dim_b;
}
net.batch = dim_b;
imgs = net.batch * net.subdivisions * ngpus;
args.n = imgs;
printf("\n %d x %d (batch = %d) \n", dim_w, dim_h, net.batch);
}
else
printf("\n %d x %d \n", dim_w, dim_h);
pthread_join(load_thread, 0);
train = buffer;
free_data(train);
load_thread = load_data(args);
for (k = 0; k < ngpus; ++k) {
resize_network(nets + k, dim_w, dim_h);
}
net = nets[0];
}
double time = what_time_is_it_now();
pthread_join(load_thread, 0);
train = buffer;
if (net.track) {
net.sequential_subdivisions = get_current_seq_subdivisions(net);
args.threads = net.sequential_subdivisions * ngpus;
printf(" sequential_subdivisions = %d, sequence = %d \n", net.sequential_subdivisions, get_sequence_value(net));
}
load_thread = load_data(args);
/*
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
if(!b.x) break;
printf("loaded: %f %f %f %f\n", b.x, b.y, b.w, b.h);
}
image im = float_to_image(448, 448, 3, train.X.vals[10]);
int k;
for(k = 0; k < l.max_boxes; ++k){
box b = float_to_box(train.y.vals[10] + 1 + k*5);
printf("%d %d %d %d\n", truth.x, truth.y, truth.w, truth.h);
draw_bbox(im, b, 8, 1,0,0);
}
save_image(im, "truth11");
*/
const double load_time = (what_time_is_it_now() - time);
printf("Loaded: %lf seconds", load_time);
if (load_time > 0.1 && avg_loss > 0) printf(" - performance bottleneck on CPU or Disk HDD/SSD");
printf("\n");
time = what_time_is_it_now();
float loss = 0;
#ifdef GPU
if (ngpus == 1) {
int wait_key = (dont_show) ? 0 : 1;
loss = train_network_waitkey(net, train, wait_key);
}
else {
loss = train_networks(nets, ngpus, train, 4);
}
#else
loss = train_network(net, train);
#endif
if (avg_loss < 0 || avg_loss != avg_loss) avg_loss = loss; // if(-inf or nan)
avg_loss = avg_loss*.9 + loss*.1;
const int iteration = get_current_iteration(net);
//i = get_current_batch(net);
int calc_map_for_each = 4 * train_images_num / (net.batch * net.subdivisions); // calculate mAP for each 4 Epochs
calc_map_for_each = fmax(calc_map_for_each, 100);
int next_map_calc = iter_map + calc_map_for_each;
next_map_calc = fmax(next_map_calc, net.burn_in);
//next_map_calc = fmax(next_map_calc, 400);
if (calc_map) {
printf("\n (next mAP calculation at %d iterations) ", next_map_calc);
if (mean_average_precision > 0) printf("\n Last accuracy mAP@0.5 = %2.2f %%, best = %2.2f %% ", mean_average_precision * 100, best_map * 100);
}
if (net.cudnn_half) {
if (iteration < net.burn_in * 3) fprintf(stderr, "\n Tensor Cores are disabled until the first %d iterations are reached.", 3 * net.burn_in);
else fprintf(stderr, "\n Tensor Cores are used.");
}
printf("\n %d: %f, %f avg loss, %f rate, %lf seconds, %d images, %f hours left\n", iteration, loss, avg_loss, get_current_rate(net), (what_time_is_it_now() - time), iteration*imgs, avg_time);
int draw_precision = 0;
if (calc_map && (iteration >= next_map_calc || iteration == net.max_batches)) {
if (l.random) {
printf("Resizing to initial size: %d x %d ", init_w, init_h);
args.w = init_w;
args.h = init_h;
int k;
if (net.dynamic_minibatch) {
for (k = 0; k < ngpus; ++k) {
for (k = 0; k < ngpus; ++k) {
nets[k].batch = init_b;
int j;
for (j = 0; j < nets[k].n; ++j)
nets[k].layers[j].batch = init_b;
}
}
net.batch = init_b;
imgs = init_b * net.subdivisions * ngpus;
args.n = imgs;
printf("\n %d x %d (batch = %d) \n", init_w, init_h, init_b);
}
pthread_join(load_thread, 0);
free_data(train);
train = buffer;
load_thread = load_data(args);
for (k = 0; k < ngpus; ++k) {
resize_network(nets + k, init_w, init_h);
}
net = nets[0];
}
copy_weights_net(net, &net_map);
// combine Training and Validation networks
//network net_combined = combine_train_valid_networks(net, net_map);
iter_map = iteration;
mean_average_precision = validate_detector_map(datacfg, cfgfile, weightfile, 0.25, 0.5, 0, net.letter_box, &net_map);// &net_combined);
printf("\n mean_average_precision (mAP@0.5) = %f \n", mean_average_precision);
if (mean_average_precision > best_map) {
best_map = mean_average_precision;
printf("New best mAP!\n");
char buff[256];
sprintf(buff, "%s/%s_best.weights", backup_directory, base);
save_weights(net, buff);
}
draw_precision = 1;
}
time_remaining = (net.max_batches - iteration)*(what_time_is_it_now() - time + load_time) / 60 / 60;
// set initial value, even if resume training from 10000 iteration
if (avg_time < 0) avg_time = time_remaining;
else avg_time = alpha_time * time_remaining + (1 - alpha_time) * avg_time;
#ifdef OPENCV
draw_train_loss(windows_name, img, img_size, avg_loss, max_img_loss, iteration, net.max_batches, mean_average_precision, draw_precision, “mAP%”, dont_show, mjpeg_port, avg_time);
#endif // OPENCV
//if (i % 1000 == 0 || (i < 1000 && i % 100 == 0)) {
//if (i % 100 == 0) {
if (iteration >= (iter_save + 1000) || iteration % 1000 == 0) {
iter_save = iteration;
#ifdef GPU
if (ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, “%s/%s_%d.weights”, backup_directory, base, iteration);
save_weights(net, buff);
}
if (iteration >= (iter_save_last + 100) || (iteration % 100 == 0 && iteration > 1)) {
iter_save_last = iteration;
#ifdef GPU
if (ngpus != 1) sync_nets(nets, ngpus, 0);
#endif
char buff[256];
sprintf(buff, “%s/%s_last.weights”, backup_directory, base);