5、想把yolov3的模型转换为caffe
使用工具:pytorch-caffe-darknet-convert
注意事项:
caffe中必须已经添加了
upsample_layer.cpp
upsample_layer.cu
upsample_layer.hpp
message UpsampleParameter{
optional int32 scale = 1 [default = 1];
}
然后使用yolov3_darknet2caffe.py
python yolov3_darknet2caffe.py yolov3.cfg yolov3.weights yolov3.prototxt yolov3.caffemodel
error
ValueError: need more than 1 value to unpack
出错不能转换,排查错误
Training应该注释掉,正确应该是
6.多标签分类 0 1 0 1 0 0 0 0 255 255 255
没有加入ignore_label,导致loos为nan和loss超级大
layer {
name: "cross_entropy"
type: "SigmoidCrossEntropyLoss"
bottom: "pedestrian-conv1000"
bottom: "label"
top: "cross_entropy"
loss_param {
ignore_label: 255
}
}
template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Forward_cpu(
const vector<Blob<Dtype>*>& bottom, const vector<Blob<Dtype>*>& top) {
// The forward pass computes the sigmoid outputs.
sigmoid_bottom_vec_[0] = bottom[0];
sigmoid_layer_->Forward(sigmoid_bottom_vec_, sigmoid_top_vec_);
// Compute the loss (negative log likelihood)
// Stable version of loss computation from input data
const Dtype* input_data = bottom[0]->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
int valid_count = 0;
Dtype loss = 0;
for (int i = 0; i < bottom[0]->count(); ++i) {
const int target_value = static_cast<int>(target[i]);
if (has_ignore_label_ && target_value == ignore_label_) {
continue;
}
loss -= input_data[i] * (target[i] - (input_data[i] >= 0)) -
log(1 + exp(input_data[i] - 2 * input_data[i] * (input_data[i] >= 0)));
++valid_count;
}
normalizer_ = get_normalizer(normalization_, valid_count);
top[0]->mutable_cpu_data()[0] = loss / normalizer_;
}
template <typename Dtype>
void SigmoidCrossEntropyLossLayer<Dtype>::Backward_cpu(
const vector<Blob<Dtype>*>& top, const vector<bool>& propagate_down,
const vector<Blob<Dtype>*>& bottom) {
if (propagate_down[1]) {
LOG(FATAL) << this->type()
<< " Layer cannot backpropagate to label inputs.";
}
if (propagate_down[0]) {
// First, compute the diff
const int count = bottom[0]->count();
const Dtype* sigmoid_output_data = sigmoid_output_->cpu_data();
const Dtype* target = bottom[1]->cpu_data();
Dtype* bottom_diff = bottom[0]->mutable_cpu_diff();
caffe_sub(count, sigmoid_output_data, target, bottom_diff);
// Zero out gradient of ignored targets.
if (has_ignore_label_) {
for (int i = 0; i < count; ++i) {
const int target_value = static_cast<int>(target[i]);
if (target_value == ignore_label_) {
bottom_diff[i] = 0;
}
}
}
// Scale down gradient
Dtype loss_weight = top[0]->cpu_diff()[0] / normalizer_;
caffe_scal(count, loss_weight, bottom_diff);
}
}
7.预训练模型尽量加上,会快速降低loss
如果是目标检测的预训练模型,也可以用在目标分类上
8.可以尝试对网络剪枝
比如yolov3-tiny通道数目减半,只要数据可以,不会影响网络整体的效果