改进3:使用GIoU性能指标和损失函数
目的:提升网络性能
cfg/yolov3-voc-giou.cfg
......
[yolo]
mask = 6,7,8
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=20
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
iou_normalizer=0.5
cls_normalizer=1.0
iou_loss=giou
......
[yolo]
mask = 3,4,5
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=20
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
iou_normalizer=0.5
cls_normalizer=1.0
iou_loss=giou
......
[yolo]
mask = 0,1,2
anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
classes=20
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1
iou_normalizer=0.5
cls_normalizer=1.0
iou_loss=giou
option_list.h
#ifdef ENABLE_GIOU
char *option_find_str_quiet(list *l, char *key, char *def);
#endif //ENABLE_GIOU
option_list.c
#ifdef ENABLE_GIOU
char *option_find_str_quiet(list *l, char *key, char *def)
{
char *v = option_find(l, key);
if (v) return v;
return def;
}
#endif //ENABLE_GIOU
parser.c
layer parse_yolo(list *options, size_params params)
{
int classes = option_find_int(options, "classes", 20);
int total = option_find_int(options, "num", 1);
int num = total;
char *a = option_find_str(options, "mask", 0);
int *mask = parse_yolo_mask(a, &num);
layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes);
assert(l.outputs == params.inputs);
#ifdef ENABLE_GIOU
l.iou_normalizer = option_find_float_quiet(options, "iou_normalizer", 0.75);
printf("iou_normalizer is %f\n", l.iou_normalizer);
l.cls_normalizer = option_find_float_quiet(options, "cls_normalizer", 1);
printf("cls_normalizer is %f\n", l.cls_normalizer);
char *iou_loss = option_find_str_quiet(options, "iou_loss", "iou"); // "iou");
printf("loss param is %s\n", iou_loss);
if (strcmp(iou_loss, "mse")==0) {
printf("loss is set to MSE\n");
l.iou_loss = MSE;
} else {
if (strcmp(iou_loss, "giou")==0) {
printf("loss is set to GIOU\n");
l.iou_loss = GIOU;
} else {
printf("loss is set to IOU\n");
l.iou_loss = IOU;
}
}
fprintf(stderr, "Yolo layer params: iou loss: %s, iou_normalizer: %f, cls_normalizer: %f\n", (l.iou_loss==MSE?"mse":(l.iou_loss==GIOU?"giou":"iou")), l.iou_normalizer, l.cls_normalizer);
#endif //ENABLE_GIOU
l.max_boxes = option_find_int_quiet(options, "max",90);
l.jitter = option_find_float(options, "jitter", .2);
l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
l.truth_thresh = option_find_float(options, "truth_thresh", 1);
l.random = option_find_int_quiet(options, "random", 0);
char *map_file = option_find_str(options, "map", 0);
if (map_file) l.map = read_map(map_file);
a = option_find_str(options, "anchors", 0);
if(a){
int len = strlen(a);
int n = 1;
int i;
for(i = 0; i < len; ++i){
if (a[i] == ',') ++n;
}
for(i = 0; i < n; ++i){
float bias = atof(a);
l.biases[i] = bias;
a = strchr(a, ',')+1;
}
}
return l;
}
darknet.h
#ifndef ENABLE_GIOU
#define ENABLE_GIOU
#endif
#ifdef ENABLE_GIOU
typedef enum {
IOU, GIOU, MSE
} IOU_LOSS;
typedef struct boxabs {
float left, right, top, bot;
} boxabs;
typedef struct dxrep {
float dt, db, dl, dr;
} dxrep;
typedef struct ious {
float iou, giou;
dxrep dx_iou;
dxrep dx_giou;
} ious;
#endif //ENABLE_GIOU
struct layer {
......
#ifdef ENABLE_GIOU
float iou_normalizer;
float cls_normalizer;
IOU_LOSS iou_loss;
#endif
......
}
box.h
#ifdef ENABLE_GIOU
float box_iou(box a, box b);
dxrep dx_box_iou(box a, box b, IOU_LOSS iou_loss);
float box_giou(box a, box b);
boxabs to_tblr(box a);
#endif
box.c
#ifdef ENABLE_GIOU
// where c is the smallest box that fully encompases a and b
boxabs box_c(box a, box b) {
boxabs ba = {
0 };
ba.top = fmin(a.y - a.h / 2, b.y - b.h / 2);
ba.bot = fmax(a.y + a.h / 2, b.y + b.h / 2);
ba.left = fmin(a.x - a.w / 2, b.x - b.w / 2);
ba.right = fmax(a.x + a.w / 2, b.x + b.w / 2);
return ba;
}
// representation from x, y, w, h to top, left, bottom, right
boxabs to_tblr(box a) {
boxabs tblr = {
0 };
float t = a.y - (a.h / 2);
float b = a.y + (a