题目简单介绍
官方提供100类狗的图片,选手将狗进行分类。
分析
官方提供的图片如下:
从图片可以看出背景复杂,图片中不仅有狗还有人,可能还有其他物体的背景,因此第一步需要将狗提取出来。考虑到从先训练一个狗的检测器将耗费一定的时间,因此采用目标检测SSD方法进行狗的检测。将狗检测出来再进行分类。
因此流程就是 检测 + 分类。
狗的检测核心代码
该代码使用Qt编写。项目配置如下:
INCLUDEPATH += /home/young/deeplearning/SSD/caffe/include \
+= /usr/include \
+= /home/young/deeplearning/SSD/caffe/src \
+= /home/young/software/cuda/include
LIBS += -L/home/young/deeplearning/SSD/caffe/build/lib -lcaffe
LIBS += -L/usr/lib/x86_64-linux-gnu -lopencv_core -lopencv_imgproc -lopencv_highgui
LIBS += -lglog -lgflags -lprotobuf -lboost_system -lboost_thread -latlas
SOURCES += \
main.cpp
# cuda
INCLUDEPATH += /usr/local/cuda/include
LIBS += -L/usr/local/cuda/lib64 -lcudart -lcublas -lcurand
#cudnn
LIBS += -L/home/young/software/cuda/lib64 -lcudnn
SSD检测代码
#include "caffe/caffe.hpp"
#define USE_OPENCV
#define CPU_ONLY
#ifdef USE_OPENCV
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif // USE_OPENCV
#include <algorithm>
#include <iomanip>
#include <iosfwd>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include<fstream>
#ifdef USE_OPENCV
using namespace caffe; // NOLINT(build/namespaces)
std::vector<std::string> splitEx(const std::string& src, std::string separate_character)
{
std::vector<std::string> strs;
int separate_characterLen = separate_character.size();//分割字符串的长度,这样就可以支持如“,,”多字符串的分隔符
int lastPosition = 0,index = -1;
while (-1 != (index = src.find(separate_character,lastPosition)))
{
strs.push_back(src.substr(lastPosition,index - lastPosition));
lastPosition = index + separate_characterLen;
}
std::string lastString = src.substr(lastPosition);//截取最后一个分隔符后的内容
if (!lastString.empty())
strs.push_back(lastString);//如果最后一个分隔符后还有内容就入队
return strs;
}
class Detector {
public:
Detector(const string& model_file,
const string& weights_file,
const string& mean_file,
const string& mean_value);
std::vector<vector<float> > Detect(const cv::Mat& img);
private:
void SetMean(const string& mean_file, const string& mean_value);
void WrapInputLayer(std::vector<cv::Mat>* input_channels);
void Preprocess(const cv::Mat& img,
std::vector<cv::Mat>* input_channels);
private:
shared_ptr<Net<float> > net_;
cv::Size input_geometry_;
int num_channels_;
cv::Mat mean_;
};
Detector::Detector(const string& model_file,
const string& weights_file,
const string& mean_file,
const string& mean_value) {
#ifdef CPU_ONLY
Caffe::set_mode(Caffe::CPU);
#else
Caffe::set_mode(Caffe::GPU);
#endif
/* Load the network. */
net_.reset(new Net<float>(model_file, TEST));
net_->CopyTrainedLayersFrom(weights_file);
CHECK_EQ(net_->num_inputs(), 1) << "Network should have exactly one input.";
CHECK_EQ(net_->num_outputs(), 1) << "Network should have exactly one output.";
Blob<float>* input_layer = net_->input_blobs()[0];
num_channels_ = input_layer->channels();
CHECK(num_channels_ == 3 || num_channels_ == 1)
<<