多线程数据加载(以训练过程为例)
darknet的数据加载机制过程:
step1: 在darknet源码detector.c-->train_detector()-->load_data(args)首先创建并启动一个线程。
pthread_t load_thread = load_data(args); //读数据
load_data()函数,首先启动一个线程,调用load_threads,load_threads是实际加载数据的线程,再load_threads加载完成前,主线程会等待。
//args:网络参数
pthread_t load_data(load_args args)
{
pthread_t thread;
struct load_args* ptr = (load_args*)xcalloc(1, sizeof(struct load_args));
*ptr = args; //现在ptr是args的拷贝,二者使用不同的内存空间,但由于ptr是args的拷贝,因此ptr.d同样也指向buffer。 这个buffer用来不断获取训练数据信息并且传回train_detector中。
//load_data中首先启动一个线程调用load_threads,
//load_threads是实际加载数据的线程,再load_threads加载完成前,主线程会等待。
if(pthread_create(&thread, 0, load_threads, ptr)) error("Thread creation failed");
return thread;
}
step2: 在load_threads中通过循环方式, 创建args.threads=64个线程, 每个线程分别都与函数run_thread_loop绑定.
void *load_threads(void *ptr)
{
//srand(time(0));
int i;
load_args args = *(load_args *)ptr; //此时args是ptr的拷贝,args.d指向buffer
if (args.threads == 0) args.threads = 1;
data *out = args.d; // 令out同样指向buffe
int total = args.n; // batchsize
free(ptr); //释放在load_data函数申请的ptr空间
data* buffers = (data*)xcalloc(args.threads, sizeof(data)); // 此处申请的buffers用来存放训练数据
if (!threads) {
threads = (pthread_t*)xcalloc(args.threads, sizeof(pthread_t));
run_load_data = (volatile int *)xcalloc(args.threads, sizeof(int));
args_swap = (load_args *)xcalloc(args.threads, sizeof(load_args));
fprintf(stderr, " Create %d permanent cpu-threads \n", args.threads);
//共申请了64个data类型空间与64个pthread_t类型空间,
//在循环中分别调用run_thread_loop,
for (i = 0; i < args.threads; ++i) {
int* ptr = (int*)xcalloc(1, sizeof(int));
*ptr = i;
if (pthread_create(&threads[i], 0, run_thread_loop, ptr)) error("Thread creation failed");
}
}
for (i = 0; i < args.threads; ++i) {
args.d = buffers + i;
//此处每次args.d的地址向后移动一个data类型空间,args.n的值为batchsize/64。
args.n = (i + 1) * total / args.threads - i * total / args.threads;
pthread_mutex_lock(&mtx_load_data);
args_swap[i] = args;
pthread_mutex_unlock(&mtx_load_data);
custom_atomic_store_int(&run_load_data[i], 1); // run thread
}
for (i = 0; i < args.threads; ++i) {
while (custom_atomic_load_int(&run_load_data[i])) this_thread_sleep_for(thread_wait_ms); // join
}
//buffers中的数据都被concat在一起,同时复制到out执行的内存空间.
*out = concat_datas(buffers, args.threads);
out->shallow = 0;
//释放在load_threads中分配的内存空间
for(i = 0; i < args.threads; ++i){
buffers[i].shallow = 1;
free_data(buffers[i]);
}
free(buffers);
//free(threads);
return 0;
}
step3: 每个线程的run_thread_loop中调用load_thread()函数进行数据读取。
//run_thread_loop函数中启动线程,并调用load_thread函数,主线程返回load_threads中等待。
void *run_thread_loop(void *ptr)
{
const int i = *(int *)ptr;
while (!custom_atomic_load_int(&flag_exit)) {
while (!custom_atomic_load_int(&run_load_data[i])) {
if (custom_atomic_load_int(&flag_exit)) {
free(ptr);
return 0;
}
this_thread_sleep_for(thread_wait_ms);
}
pthread_mutex_lock(&mtx_load_data);
load_args *args_local = (load_args *)xcalloc(1, sizeof(load_args));
*args_local = args_swap[i];
pthread_mutex_unlock(&mtx_load_data);
load_thread(args_local);
custom_atomic_store_int(&run_load_data[i], 0);
}
free(ptr);
return 0;
}
step4: load_thread函数调用load_data_detection函数加载数据。
void *load_thread(void *ptr)
{
//srand(time(0));
//printf("Loading data: %d\n", random_gen());
load_args a = *(struct load_args*)ptr;
if(a.exposure == 0) a.exposure = 1;
if(a.saturation == 0) a.saturation = 1;
if(a.aspect == 0) a.aspect = 1;
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure, a.mixup, a.blur, a.show_imgs, a.label_smooth_eps, a.dontuse_opencv, a.contrastive);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
//检测调用
//此处a是ptr的拷贝,a.d指向的是buffers的地址空间,
//所以不需要再额外调用calloc函数,就可以直接使用*a.d。
//free ptr后,在load_data_in_thread中申请的内存空间也就被释放了。
//同时load_thread函数返回后,a变量占用的栈空间也会被释放。
*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.truth_size, a.classes, a.flip, a.gaussian_noise, a.blur, a.mixup, a.jitter, a.resize,
a.hue, a.saturation, a.exposure, a.mini_batch, a.track, a.augment_speed, a.letter_box, a.mosaic_bound, a.contrastive, a.contrastive_jit_flip, a.show_imgs);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){
*a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
} else if (a.type == IMAGE_DATA){
*(a.im) = load_image(a.path, 0, 0, a.c);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
}else if (a.type == LETTERBOX_DATA) {
*(a.im) = load_image(a.path, 0, 0, a.c);
*(a.resized) = letterbox_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.w, a.h, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
}
free(ptr);
return 0;
}
step5: load_data_detection函数从所有训练图片中,随机读取n张,并对这n张图片进行数据增强,同时矫正增强后的数据标签信息。这个函数在下一节介绍