网络结构,是Yushiqi老师开源的人脸检测网络
四个用于检测的特征层信息如下:
层 | 尺寸 | min_size(1:1) | priorboxes |
---|---|---|---|
conv3_3 | 40x40 | 10,16,24 | 40x40x3=4800 |
conv4_3 | 20x20 | 32,48 | 20x20x2=800 |
conv5_3 | 10x10 | 64,96 | 10x10x2=200 |
conv6_3 | 5x5 | 128,192,256 | 5x5x3=75 |
下面是可视化的代码:
//可视化默认框
cv::Mat Classifier::visualize_priorbox(const cv::Mat& img,vector<string> layer_names)
{
Blob<float>* input_layer = net_->input_blobs()[0];
input_layer->Reshape(1, num_channels_, input_geometry_.height, input_geometry_.width);
net_->Reshape();
std::vector<cv::Mat> input_channels;
WrapInputLayer(&input_channels);
Preprocess(img, &input_channels);
net_->Forward();
cv::Mat new_img;
cv::resize(img, new_img, cv::Size(320, 320));
for (int i = 0; i < layer_names.size(); i++)
{
string layer_name = layer_names[i];
assert(net_->has_blob(layer_name));
cout << "提取的prior层名:" << layer_name << endl;
boost::shared_ptr<Blob<float> > priorBlob = net_->blob_by_name(layer_name);
std::cout << "形状信息为:" << priorBlob->shape_string() << std::endl;
string conv_layer = layer_name.substr(0, 12);//截取前半部分,找到对应的normalize层
cout << conv_layer << endl;
assert(net_->has_blob(conv_layer));
cout << "提取的conv层名:" << conv_layer << endl;
boost::shared_ptr<Blob<float> > convBlob = net_->blob_by_name(conv_layer);
std::cout << "形状信息为:" << convBlob->shape_string() << std::endl;
int num_priors_ = (priorBlob->height() / 4);//某个priorbox层的所有priorbox的个数(每个框占4个位置)
int conv_w = convBlob->width();
int conv_h = convBlob->height();
int num_priors_grid = (priorBlob->height() / 4)/(conv_w*conv_h);//每个格子点上对应的priorbox的个数
//int num_prior_grid = (conv1Blob->height() / 4);
int channel = priorBlob->shape(1);
int batch = priorBlob->shape(0);
cout << "priorbox nums:" << num_priors_ << endl;
const float* tmpValue = priorBlob->cpu_data();
vector<NormalizedBBox> prior_bboxes;
vector<vector<float> > prior_variances;
GetPriorBBoxes(tmpValue, num_priors_, &prior_bboxes, &prior_variances);
int start = num_priors_ /2 + (convBlob->width())*num_priors_grid/2;//选择图像最中间的那个点
cout << "start from " << start <<" priorbox"<< endl;
for (int i = start; i < start+num_priors_grid; i++)
//for (int i = 0; i < num_priors_; i++)
{
NormalizedBBox bbox = prior_bboxes[i];
int xmin = bbox.xmin() * 320;
int ymin = bbox.ymin() * 320;
int xmax = bbox.xmax() * 320;
int ymax = bbox.ymax() * 320;
int box_w = xmax - xmin;
cv::Rect rect(xmin, ymin, xmax, ymax);//左上坐标(x,y)和矩形的长(x)宽(y)
cv::Point pt1, pt2;
pt1.x = xmin;
pt1.y = ymin;
pt2.x = xmax;
pt2.y = ymax;
cout << xmin << " " << ymin << " " << xmax << " " << ymax << endl;
int b = rand() % 255 + 1;
int g = rand() % 255 + 1;
int r = rand() % 255 + 1;
cv::rectangle(new_img, pt1, pt2, cvScalar(b, g, r), 1, 8, 0);
//cv::putText(new_img, to_string(box_w), pt1, cv::FONT_HERSHEY_SIMPLEX, 0.3, cv::Scalar(b, g, r));
}
}
return new_img;
}
主函数调用:
int main(int argc, char** argv) {
if (argc != 6) {
std::cerr << "Usage: " << argv[0]
<< " deploy.prototxt network.caffemodel"
<< " mean.binaryproto testimg" << std::endl;
return 1;
}
//::google::InitGoogleLogging(argv[0]);
string model_file = argv[1];//网络结构文件
string trained_file = argv[2];//模型
string mean_file = argv[3];//均值文件,空着,使用均值
Classifier classifier(model_file, trained_file, mean_file, 104, 117, 123);
string layer_str = argv[4];//要可视化的层,可以同时写多个用逗号隔开
string file = argv[5];//测试图片
stringstream ss(layer_str);
vector<string> layers;
string item;
while (getline(ss, item, ',')) {
string ly = item.c_str();
layers.push_back(ly);
}
cv::Mat img = cv::imread(file, -1);
CHECK(!img.empty()) << "Unable to decode image " << file;
cv::Mat priorboxes=classifier.visualize_priorbox(img,layers);
imshow("priorboxs", priorboxes);
cv::waitKey(0);
}
opencv输出的用于检测的4层对应的priorbox(注意,priorbox是相对于原图来说的,不是feature map )
从左到右分别是conv3_3,conv4_3,conv5_3,conv6_3
这里的各个priorbox因为都用的是1:1的比例,所以重叠在一起,看起来是图中这种小格子,其实比这更大,下面是我修改的增加了2:1比例的priorbox图片
从左到右分别是conv3_3,conv4_3,conv5_3,conv6_3
如果要再清晰的话,可以只显示其中一个点处的1:1的priorboxes(这里只显示了前三层的,因为显示的时候实在是不好对齐)