最近在做项目,用到了YOLOV5的分类模型,项目需求是将模型使用RKNN C++ API 接口实现。因为之前用C++实现过检测模型,刚开始便想当然的以为和YOLO的检测模型大差不差,甚至会更简单,就是因为这种心理踩了很多坑才实现项目需求。唉,怎么就没有人早点告诉我走弯路了呢?
目录
在进入正文之前,如果你不了解如何用C++去调用RKNN的C API接口实现代码推理,你有必要先阅读一下我的这篇文章:RKNN C++ 推理流程
简单讲一下分类模型和检测模型的区别:
(1)前处理方式不同,检测模型则是通过LetterBox的方式处理数据,而分类模型的前处理采用的是CenterCrop的方式。
(2)归一化不同,检测模型归一化过程中均值是0,方差是255。而分类模型归一化过程中均值是[0.485, 0.456, 0.406],方差是[0.229, 0.224, 0.225] 或者 [0.485 * 255, 0.456 * 255, 0.406 * 255],[0.229 * 255, 0.224 * 255, 0.225 * 255],两种方式取决于送入归一化之前是否进行了 Input / 255 的操作 。
废话不多说,直接上代码:
1. 数据前处理函数
cv::Mat centerCrop(const cv::Mat& image, cv::Size cropSize) {
// 获取图像的高度和宽度
int height = image.rows;
int width = image.cols;
// 获取最小尺寸
int m = std::min(height, width);
// 计算裁剪区域的起始和结束坐标
int top = (height - m) / 2;
int left = (width - m) / 2;
// 裁剪图像
cv::Rect cropRegion(left, top, m, m);
cv::Mat cropped = image(cropRegion);
// 调整裁剪后的图像大小
cv::Mat resized;
cv::resize(cropped, resized, cropSize, 0, 0, cv::INTER_LINEAR);
return resized;
}
cv::Mat transfer(const cv::Mat& image) {
cv::Mat rgb_image;
cv::cvtColor(image, rgb_image, cv::COLOR_BGR2RGB);
rgb_image.convertTo(rgb_image, CV_32F, 1.0 / 255);
if (!rgb_image.isContinuous()) {
rgb_image = rgb_image.clone();
}
return rgb_image;
}
2. 其他辅助函数
// 打印tenser attr
static void dump_tensor_attr(rknn_tensor_attr *attr)
{
std::string shape_str = attr->n_dims < 1 ? "" : std::to_string(attr->dims[0]);
for (int i = 1; i < attr->n_dims; ++i)
{
shape_str += ", " + std::to_string(attr->dims[i]);
}
printf(" index=%d, name=%s, n_dims=%d, dims=[%s], n_elems=%d, size=%d, w_stride = %d, size_with_stride=%d, fmt=%s, "
"type=%s, qnt_type=%s, "
"zp=%d, scale=%f\n",
attr->index, attr->name, attr->n_dims, shape_str.c_str(), attr->n_elems, attr->size, attr->w_stride,
attr->size_with_stride, get_format_string(attr->fmt), get_type_string(attr->type),
get_qnt_type_string(attr->qnt_type), attr->zp, attr->scale);
}
// 加载模型成二进制文件
static unsigned char *load_model(const char *filename, int *model_size)
{
FILE *fp = fopen(filename, "rb");
if (fp == nullptr)
{
printf("fopen %s fail!", filename);
return nullptr;
}
fseek(fp, 0, SEEK_END);
int model_len = ftell(fp);
unsigned char *model = (unsigned char *)malloc(model_len);
fseek(fp, 0, SEEK_SET);
if (model_len != fread(model, 1, model_len, fp))
{
printf("fread %s fail!", filename);
free(model);
return nullptr;
}
*model_size = model_len;
if (fp)
{
fclose(fp);
}
return model;
}
3. 主函数
int main(int argc, char **argv)
{
const char *model_file = argv[1];
const char *img_file = argv[2];
rknn_context ctx;
int model_len = 0; // 模型文件大小
auto model = load_model(model_file, &model_len); // 加载模型文件
if (model == nullptr)
{
printf("load model file %s fail!", model_file);
return -1;
}
// 初始化rknn模型
int ret = rknn_init(&ctx, model, model_len, 0, NULL); // 初始化rknn context
if (ret < 0)
{
printf("rknn_init fail! ret=%d", ret);
return -1;
}
// 获取rknn版本信息
rknn_sdk_version version;
ret = rknn_query(ctx, RKNN_QUERY_SDK_VERSION, &version, sizeof(rknn_sdk_version));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("sdk version: %s driver version: %s\n", version.api_version, version.drv_version);
// 获取rknn输入输出个数
rknn_input_output_num io_num;
ret = rknn_query(ctx, RKNN_QUERY_IN_OUT_NUM, &io_num, sizeof(io_num));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
printf("model input num: %d, output num: %d\n", io_num.n_input, io_num.n_output);
// 获取rknn输入属性
rknn_tensor_attr input_attrs[io_num.n_input];
memset(input_attrs, 0, sizeof(input_attrs));
for (int i = 0; i < io_num.n_input; i++)
{
input_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_INPUT_ATTR, &(input_attrs[i]), sizeof(rknn_tensor_attr));
if (ret < 0)
{
printf("rknn_init error ret=%d\n", ret);
return -1;
}
dump_tensor_attr(&(input_attrs[i]));
}
// 获取rknn输出属性
rknn_tensor_attr output_attrs[io_num.n_output];
memset(output_attrs, 0, sizeof(output_attrs));
for (int i = 0; i < io_num.n_output; i++)
{
output_attrs[i].index = i;
ret = rknn_query(ctx, RKNN_QUERY_OUTPUT_ATTR, &(output_attrs[i]), sizeof(rknn_tensor_attr));
dump_tensor_attr(&(output_attrs[i]));
}
int channel = 3;
int width = 0;
int height = 0;
if (input_attrs[0].fmt == RKNN_TENSOR_NCHW)
{
printf("model is NCHW input fmt\n");
channel = input_attrs[0].dims[1];
height = input_attrs[0].dims[2];
width = input_attrs[0].dims[3];
}
else
{
printf("model is NHWC input fmt\n");
height = input_attrs[0].dims[1];
width = input_attrs[0].dims[2];
channel = input_attrs[0].dims[3];
}
printf("model input height=%d, width=%d, channel=%d\n", height, width, channel);
// 读取图片
cv::Mat orig_img = cv::imread(img_file, 1);
if (!orig_img.data)
{
printf("cv::imread %s fail!\n", img_file);
return -1;
}
cv::Size crop_size(224, 224);
cv::Mat cropped_image = centerCrop(orig_img, crop_size);
cv::Mat transferred_image = transfer(cropped_image);
printf("transferred_image width = %d, transferred_image height = %d\n", transferred_image.cols, transferred_image.rows);
rknn_input inputs[1];
memset(inputs, 0, sizeof(inputs));
inputs[0].index = 0;
inputs[0].type = RKNN_TENSOR_FLOAT32;
inputs[0].size = transferred_image.total() * transferred_image.elemSize();
inputs[0].fmt = RKNN_TENSOR_NHWC;
inputs[0].pass_through = false;
inputs[0].buf = transferred_image.data;
rknn_inputs_set(ctx, io_num.n_input, inputs);
rknn_output outputs[io_num.n_output];
memset(outputs, 0, sizeof(outputs));
for (int i = 0; i < io_num.n_output; i++)
{
outputs[i].index = i;
outputs[i].want_float = true;
}
// 执行推理
ret = rknn_run(ctx, NULL);
ret = rknn_outputs_get(ctx, io_num.n_output, outputs, NULL);
size_t num_elements = outputs->size / 4; // 输出元素的数量
std::cout << outputs->size << std::endl;
std::vector<float> output_vector(num_elements);
std::memcpy(output_vector.data(), outputs->buf, outputs->size);
// 打印前几个元素作为示例
for (size_t i = 0; i < std::min(10ul, num_elements); ++i) {
std::cout << "Output[" << i << "]: " << output_vector[i] << std::endl;
}
std::vector<float> probs = softmax(output_vector);
}
上述三份代码组合起来就是完整的推理代码了!实际运行时命令行传入模型地址和图片地址即可,没有啥难度。