加速yolov3需要处理三件事情:
1)upsample层;
2)图像的预处理;
3)加速结果的后处理;
贴出upsample层的代码:(参考:)
/******************************/
// Upsample Plugin
/******************************/
//*fuxueping
//2018-11-7
class UpsamplePlugin: public IPlugin
{
public:
UpsamplePlugin();
UpsamplePlugin(size_t scale)
{
m_scale = scale;
// std::cout<<"UpsampleLayer0"<<std::endl;
}
UpsamplePlugin(const void* data, size_t length)
{
const char* d = reinterpret_cast<const char*>(data);
const char *a = d;
m_input_c = read<int>(d);
m_input_h = read<int>(d);
m_input_w = read<int>(d);//input_h=input_w
m_scale = read<int>(d);
m_output_count = m_input_c * m_input_w * m_input_h * m_scale * m_scale;
assert(d == a + length);
}
~UpsamplePlugin();
int getNbOutputs() const override{
return 1;
}
Dims getOutputDimensions(int index, const Dims* inputs, int nbInputDims) override{
m_input_c=inputs[0].d[0];
m_input_h=inputs[0].d[1];
m_input_w=inputs[0].d[2];
assert(index == 0 && nbInputDims == 1 && inputs[0].nbDims == 3);
return DimsCHW(inputs[0].d[0], inputs[0].d[1]*m_scale, inputs[0].d[2]*m_scale);
}
void configure(const Dims* inputDims, int nbInputs, const Dims* outputDims, int nbOutputs, int maxBatchSize) override{
m_input_c = inputDims[0].d[0];
m_input_h = inputDims[0].d[1];
m_input_w = inputDims[0].d[2];
}
int initialize() override{
return 0;
}
void terminate() override{
}
size_t getWorkspaceSize(int maxBatchSize) const override{
return 0;
}
int enqueue(int batchSize, const void*const * inputs, void** outputs, void* workspace, cudaStream_t stream) override{
int d1 = m_input_c;
int d2 = m_input_h*m_scale;
int d3 = m_input_w*m_scale;
// cout<<"d1 = "<<d1<<endl;
// cout<<"d2 = "<<d2<<endl;
// cout<<"d3 = "<<d3<<endl;
int top_count = m_input_c * m_input_w * m_input_h * m_scale * m_scale;
UpsampleForward_gpu(d1,d2,d3,m_scale,top_count,(const float*)inputs[0],(float *)outputs[0]);
return 0;
}
size_t getSerializationSize() override{
return 4*sizeof(int);
}
void serialize(void* buffer) override{
char* d = reinterpret_cast<char*>(buffer),*a = d;
write(d, m_input_c);
write(d, m_input_h);
write(d, m_input_w);
write(d, m_scale);
// d[0] = m_input_c;
// d[1] = m_input_h;
// d[2] = m_input_w;
// d[3] = m_scale;
}
private:
template<typename T> void write(char*& buffer, const T& val)
{
*reinterpret_cast<T*>(buffer) = val;
// cout<<sizeof(T)<<endl;
buffer += sizeof(T);
}
template<typename T> T read(const char*& buffer)
{
T val = *reinterpret_cast<const T*>(buffer);
// cout<<sizeof(T)<<endl;
buffer += sizeof(T);
return val;
}
int m_input_c;
int m_input_h;
int m_input_w;
int m_output_count;
int m_scale;
DimsCHW m_dimsBottomData;
};
如何调用:
if (layer_type == "Upsample")
{
int scale = iter1->second.upsample_param.scale;
m_upsample_plugin_ptr = std::unique_ptr<UpsamplePlugin>(new UpsamplePlugin(scale));
m_vec_upsample_plugin.push_back(std::move(m_upsample_plugin_ptr));
return m_vec_upsample_plugin.back().get();
// _nvPlugins[layerName] = (plugin::INvPlugin*)(new UpsamplePlugin(scale));
// return _nvPlugins.at(layerName);
}
图片的前向处理和后期结果处理,参考的demo;(地址:https://download.csdn.net/download/qq_22764813/10798442)
还需要使用代码将darknet的yoloV3转换为caffe版本,这个我就帮不了大家了,需要自己处理.如果有疑问或者什么错误,希望大家留言,谢谢;