由于不知道为什么focal loss要用if,因此直接将if置为1
在yolo_layer.c中
delta_yolo_class 计算类别误差
void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
{
int n;
if (delta[index]){ //应该不会进入这个判断,因为 delta[index] 初值为0
delta[index + stride*class] = 1 - output[index + stride*class];
if(avg_cat) *avg_cat += output[index + stride*class];
return;
}
for(n = 0; n < classes; ++n){ //对所有类别,如果预测正确,则误差为 1-predict,否则为 0-predict
delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n];
if(n == class && avg_cat) *avg_cat += output[index + stride*n];
}
}
修改后如下:
void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
{
int n;
if (delta[index]){
delta[index + stride*class] = 1 - output[index + stride*class];
if(avg_cat) *avg_cat += output[index + stride*class];
return;
}
// Focal loss
if (1) {
// Focal Loss
float alpha = 0.5; // 0.25 or 0.5
//float gamma = 2; // hardcoded in many places of the grad-formula
int ti = index + stride*class;
float pt = output[ti] + 0.000000000000001F;
// http://fooplot.com/#W3sidHlwZSI6MCwiZXEiOiItKDEteCkqKDIqeCpsb2coeCkreC0xKSIsImNvbG9yIjoiIzAwMDAwMCJ9LHsidHlwZSI6MTAwMH1d
float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832
//float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss
for (n = 0; n < classes; ++n) {
delta[index + stride*n] = (((n == class) ? 1 : 0) - output[index + stride*n]);
delta[index + stride*n] *= alpha*grad;
if (n == class) *avg_cat += output[index + stride*n];
}
}
else{
for(n = 0; n < classes; ++n){
delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n];
if(n == class && avg_cat) *avg_cat += output[index + stride*n];
}
}
}