1. Convert pt to onnx
$ mkdir Github && cd Github
$ git clone https://github.com/ultralytics/ultralytics
$ cd ultralytics
$ git checkout b9b0fd8bf409c822b7fcb21d65722b242f5307fc
$ pip install -r requirements.txt
1.1 Modify forward
method of class C2f(nn.Module):
def forward(self, x):
# """Forward pass through C2f layer."""
# y = list(self.cv1(x).chunk(2, 1))
# y.extend(m(y[-1]) for m in self.m)
# return self.cv2(torch.cat(y, 1))
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
x = self.cv1(x)
x = [x, x[:, self.c:, ...]]
x.extend(m(x[-1]) for m in self.m)
x.pop(1)
return self.cv2(torch.cat(x, 1))
1.2 Modify forward
method of class Detect(nn.Module):
def forward(self, x):
"""Concatenates and returns predicted bounding boxes and class probabilities."""
shape = x[0].shape # BCHW
for i in range(self.nl):
x[i] = torch.cat((self.cv2[i](x[i]), self.cv3[i](x[i])), 1)
if self.training:
return x
elif self.dynamic or self.shape != shape:
self.anchors, self.strides = (x.transpose(0, 1) for x in make_anchors(x, self.stride, 0.5))
self.shape = shape
x_cat = torch.cat([xi.view(shape[0], self.no, -1) for xi in x], 2)
return x_cat
# if self.export and self.format in ('saved_model', 'pb', 'tflite', 'edgetpu', 'tfjs'): # avoid TF FlexSplitV ops
# box = x_cat[:, :self.reg_max * 4]
# cls = x_cat[:, self.reg_max * 4:]
# else:
# box, cls = x_cat.split((self.reg_max * 4, self.nc), 1)
# dbox = dist2bbox(self.dfl(box), self.anchors.unsqueeze(0), xywh=True, dim=1) * self.strides
# if self.export and self.format in ('tflite', 'edgetpu'):
# # Normalize xywh with image size to mitigate quantization error of TFLite integer models as done in YOLOv5:
# # https://github.com/ultralytics/yolov5/blob/0c8de3fca4a702f8ff5c435e67f378d1fce70243/models/tf.py#L307-L309
# # See this PR for details: https://github.com/ultralytics/ultralytics/pull/1695
# img_h = shape[2] * self.stride[0]
# img_w = shape[3] * self.stride[0]
# img_size = torch.tensor([img_w, img_h, img_w, img_h], device=dbox.device).reshape(1, 4, 1)
# dbox /= img_size
# y = torch.cat((dbox, cls.sigmoid()), 1)
# return y if self.export else (y, x)
1.3 Modify forward
method of class Segment(Detect):
def forward(self, x):
"""Return model outputs and mask coefficients if training, otherwise return outputs and mask coefficients."""
p = self.proto(x[0]) # mask protos
bs = p.shape[0] # batch size
mc = torch.cat([self.cv4[i](x[i]).view(bs, self.nm, -1) for i in range(self.nl)], 2) # mask coefficients
x = self.detect(self, x)
if self.training:
return x, mc, p
# return (torch.cat([x, mc], 1), p) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
# !< https://github.com/FeiGeChuanShu/ncnn-android-yolov8
return (torch.cat([x, mc], 1).permute(0, 2, 1), p.view(bs, self.nm, -1)) if self.export else (torch.cat([x[0], mc], 1), (x[1], mc, p))
1.4 Create convert_seg.py
file under ~/Github/ultralytics
folder
from ultralytics import YOLO
# Load model
model = YOLO("/home/tianzx/ai_model/seg/pf_phone_seg/pf_phone_yolov8n_seg.pt")
# Export model
success = model.export(task="segment", format="onnx", opset=12, imgsz=640, simplify=True)
2. Conver onnx to ncnn
2.1 Build ncnn
$ cd ~/Github/
$ git clone https://github.com/Tencent/ncnn.git
$ cd ncnn
$ git submodule update --init
$ sudo apt install build-essential git cmake libprotobuf-dev protobuf-compiler libvulkan-dev vulkan-utils libopencv-dev
# build part
$ mkdir build && cd build
$ cmake -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=/usr/local/ ..
$ make -j$(nproc)
$ sudo make install
2.2 Convert onnx to ncnn
$ cd /home/tianzx/ai_model/seg/pf_phone_seg/
$ onnx2ncnn pf_phone_yolov8n_seg.onnx pf_phone_yolov8n_seg.param pf_phone_yolov8n_seg.bin
$ ls
pf_phone_yolov8n_seg.bin pf_phone_yolov8n_seg.onnx pf_phone_yolov8n_seg.param pf_phone_yolov8n_seg.pt
3. Test
3.1 Create CMakeLists.txt
and pf_phone_yolov8n_seg.cpp
CMakeLists.txt
$ cd /home/tianzx/ai_model/seg/pf_phone_seg/
$ vim CMakeLists.txt
$ vim pf_phone_yolov8n_seg.cpp
cmake_minimum_required(VERSION 3.5)
project(pf_phone_seg)
set(CMAKE_BUILD_TYPE Release)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++11 -pie -fPIE -fPIC -Wall -O3")
find_package(OpenCV REQUIRED)
if (OpenCV_FOUND)
message(STATUS "OpenCV_LIBS: ${OpenCV_LIBS}")
message(STATUS "OpenCV_INCLUDE_DIRS: ${OpenCV_INCLUDE_DIRS}")
else ()
message(FATAL_ERROR "opencv Not Found!")
endif (OpenCV_FOUND)
find_package(OpenMP REQUIRED)
if (OPENMP_FOUND)
message("OPENMP FOUND")
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} ${OpenMP_EXE_LINKER_FLAGS}")
else ()
message(FATAL_ERROR "OpenMP Not Found!")
endif ()
include_directories(/usr/local/include)
include_directories(/usr/local/include/ncnn)
link_directories(/usr/local/lib)
# Source files
file(GLOB SRC "*.h" "*.cpp")
add_executable(pf_phone_seg ${SRC})
target_link_libraries(pf_phone_seg ncnn ${OpenCV_LIBS})
pf_phone_yolov8n_seg.cpp
#include "ncnn/net.h"
#if defined(USE_NCNN_SIMPLEOCV)
#include "simpleocv.h"
#else
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/imgproc/imgproc.hpp>
#endif
#include <stdlib.h>
#include <float.h>
#include <stdio.h>
#include <vector>
// !< add by tianzx 2023.10.20
#include <iostream>
using namespace std;
using namespace cv;
static void slice(const ncnn::Mat& in, ncnn::Mat& out, int start, int end, int axis)
{
ncnn::Option opt;
opt.num_threads = 4;
opt.use_fp16_storage = false;
opt.use_packing_layout = false;
ncnn::Layer* op = ncnn::create_layer("Crop");
// set param
ncnn::ParamDict pd;
ncnn::Mat axes = ncnn::Mat(1);
axes.fill(axis);
ncnn::Mat ends = ncnn::Mat(1);
ends.fill(end);
ncnn::Mat starts = ncnn::Mat(1);
starts.fill(start);
pd.set(9, starts);// start
pd.set(10, ends);// end
pd.set(11, axes);//axes
op->load_param(pd);
op->create_pipeline(opt);
// forward
op->forward(in, out, opt);
op->destroy_pipeline(opt);
delete op;
}
static void interp(const ncnn::Mat& in, const float& scale, const int& out_w, const int& out_h, ncnn::Mat& out)
{
ncnn::Option opt;
opt.num_threads = 4;
opt.use_fp16_storage = false;
opt.use_packing_layout = false;
ncnn::Layer* op = ncnn::create_layer("Interp");
// set param
ncnn::ParamDict pd;
pd.set(0, 2);// resize_type
pd.set(1, scale);// height_scale
pd.set(2, scale);// width_scale
pd.set(3, out_h);// height
pd.set(4, out_w);// width
op->load_param(pd);
op->create_pipeline(opt);
// forward
op->forward(in, out, opt);
op->destroy_pipeline(opt);
delete op;
}
static void reshape(const ncnn::Mat& in, ncnn::Mat& out, int c, int h, int w, int d)
{
ncnn::Option opt;
opt.num_threads = 4;
opt.use_fp16_storage = false;
opt.use_packing_layout = false;
ncnn::Layer* op = ncnn::create_layer("Reshape");
// set param
ncnn::ParamDict pd;
pd.set(0, w);// start
pd.set(1, h);// end
if (d > 0)
pd.set(11, d);//axes
pd.set(2, c);//axes
op->load_param(pd);
op->create_pipeline(opt);
// forward
op->forward(in, out, opt);
op->destroy_pipeline(opt);
delete op;
}
static void sigmoid(ncnn::Mat& bottom)
{
ncnn::Option opt;
opt.num_threads = 4;
opt.use_fp16_storage = false;
opt.use_packing_layout = false;
ncnn::Layer* op = ncnn::create_layer("Sigmoid");
op->create_pipeline(opt);
// forward
op->forward_inplace(bottom, opt);
op->destroy_pipeline(opt);
delete op;
}
static void matmul(const std::vector<ncnn::Mat>& bottom_blobs, ncnn::Mat& top_blob)
{
ncnn::Option opt;
opt.num_threads = 2;
opt.use_fp16_storage = false;
opt.use_packing_layout = false;
ncnn::Layer* op = ncnn::create_layer("MatMul");
// set param
ncnn::ParamDict pd;
pd.set(0, 0);// axis
op->load_param(pd);
op->create_pipeline(opt);
std::vector<ncnn::Mat> top_blobs(1);
op->forward(bottom_blobs, top_blobs, opt);
top_blob = top_blobs[0];
op->destroy_pipeline(opt);
delete op;
}
struct Object
{
cv::Rect_<float> rect;
int label;
float prob;
cv::Mat mask;
std::vector<float> mask_feat;
};
struct GridAndStride
{
int grid0;
int grid1;
int stride;
};
static inline float intersection_area(const Object& a, const Object& b)
{
cv::Rect_<float> inter = a.rect & b.rect;
return inter.area();
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects, int left, int right)
{
int i = left;
int j = right;
float p = faceobjects[(left + right) / 2].prob;
while (i <= j)
{
while (faceobjects[i].prob > p)
i++;
while (faceobjects[j].prob < p)
j--;
if (i <= j)
{
// swap
std::swap(faceobjects[i], faceobjects[j]);
i++;
j--;
}
}
#pragma omp parallel sections
{
#pragma omp section
{
if (left < j) qsort_descent_inplace(faceobjects, left, j);
}
#pragma omp section
{
if (i < right) qsort_descent_inplace(faceobjects, i, right);
}
}
}
static void qsort_descent_inplace(std::vector<Object>& faceobjects)
{
if (faceobjects.empty())
return;
qsort_descent_inplace(faceobjects, 0, faceobjects.size() - 1);
}
static void nms_sorted_bboxes(const std::vector<Object>& faceobjects, std::vector<int>& picked, float nms_threshold)
{
picked.clear();
const int n = faceobjects.size();
std::vector<float> areas(n);
for (int i = 0; i < n; i++)
{
areas[i] = faceobjects[i].rect.width * faceobjects[i].rect.height;
}
for (int i = 0; i < n; i++)
{
const Object& a = faceobjects[i];
int keep = 1;
for (int j = 0; j < (int)picked.size(); j++)
{
const Object& b = faceobjects[picked[j]];
// intersection over union
float inter_area = intersection_area(a, b);
float union_area = areas[i] + areas[picked[j]] - inter_area;
// float IoU = inter_area / union_area
if (inter_area / union_area > nms_threshold)
keep = 0;
}
if (keep)
picked.push_back(i);
}
}
inline float fast_exp(float x)
{
union {
uint32_t i;
float f;
} v{};
v.i = (1 << 23) * (1.4426950409 * x + 126.93490512f);
return v.f;
}
inline float sigmoid(float x)
{
return 1.0f / (1.0f + fast_exp(-x));
}
static void generate_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& pred, float prob_threshold, std::vector<Object>& objects)
{
const int num_points = grid_strides.size();
const int num_class = 1;
const int reg_max_1 = 16;
for (int i = 0; i < num_points; i++)
{
const float* scores = pred.row(i) + 4 * reg_max_1;
// find label with max score
int label = -1;
float score = -FLT_MAX;
for (int k = 0; k < num_class; k++)
{
float confidence = scores[k];
if (confidence > score)
{
label = k;
score = confidence;
}
}
float box_prob = sigmoid(score);
if (box_prob >= prob_threshold)
{
ncnn::Mat bbox_pred(reg_max_1, 4, (void*)pred.row(i));
{
ncnn::Layer* softmax = ncnn::create_layer("Softmax");
ncnn::ParamDict pd;
pd.set(0, 1); // axis
pd.set(1, 1);
softmax->load_param(pd);
ncnn::Option opt;
opt.num_threads = 1;
opt.use_packing_layout = false;
softmax->create_pipeline(opt);
softmax->forward_inplace(bbox_pred, opt);
softmax->destroy_pipeline(opt);
delete softmax;
}
float pred_ltrb[4];
for (int k = 0; k < 4; k++)
{
float dis = 0.f;
const float* dis_after_sm = bbox_pred.row(k);
for (int l = 0; l < reg_max_1; l++)
{
dis += l * dis_after_sm[l];
}
pred_ltrb[k] = dis * grid_strides[i].stride;
}
float pb_cx = (grid_strides[i].grid0 + 0.5f) * grid_strides[i].stride;
float pb_cy = (grid_strides[i].grid1 + 0.5f) * grid_strides[i].stride;
float x0 = pb_cx - pred_ltrb[0];
float y0 = pb_cy - pred_ltrb[1];
float x1 = pb_cx + pred_ltrb[2];
float y1 = pb_cy + pred_ltrb[3];
Object obj;
obj.rect.x = x0;
obj.rect.y = y0;
obj.rect.width = x1 - x0;
obj.rect.height = y1 - y0;
obj.label = label;
obj.prob = box_prob;
obj.mask_feat.resize(32);
std::copy(pred.row(i) + 64 + num_class, pred.row(i) + 64 + num_class + 32, obj.mask_feat.begin());
objects.push_back(obj);
}
}
}
static void generate_grids_and_stride(const int target_w, const int target_h, std::vector<int>& strides, std::vector<GridAndStride>& grid_strides)
{
for (int i = 0; i < (int)strides.size(); i++)
{
int stride = strides[i];
int num_grid_w = target_w / stride;
int num_grid_h = target_h / stride;
for (int g1 = 0; g1 < num_grid_h; g1++)
{
for (int g0 = 0; g0 < num_grid_w; g0++)
{
GridAndStride gs;
gs.grid0 = g0;
gs.grid1 = g1;
gs.stride = stride;
grid_strides.push_back(gs);
}
}
}
}
static void decode_mask(const ncnn::Mat& mask_feat, const int& img_w, const int& img_h,
const ncnn::Mat& mask_proto, const ncnn::Mat& in_pad, const int& wpad, const int& hpad,
ncnn::Mat& mask_pred_result)
{
ncnn::Mat masks;
matmul(std::vector<ncnn::Mat>{mask_feat, mask_proto}, masks);
sigmoid(masks);
reshape(masks, masks, masks.h, in_pad.h / 4, in_pad.w / 4, 0);
slice(masks, mask_pred_result, (wpad / 2) / 4, (in_pad.w - wpad / 2) / 4, 2);
slice(mask_pred_result, mask_pred_result, (hpad / 2) / 4, (in_pad.h - hpad / 2) / 4, 1);
interp(mask_pred_result, 4.0, img_w, img_h, mask_pred_result);
}
static int detect_yolov8(std::string ncnn_bin_path, std::string ncnn_param_path, int imgsz, const cv::Mat& bgr, std::vector<Object>& objects)
{
ncnn::Net yolov8;
yolov8.load_param(ncnn_param_path.c_str());
yolov8.load_model(ncnn_bin_path.c_str());
int width = bgr.cols;
int height = bgr.rows;
const int target_size = imgsz;
const float prob_threshold = 0.4f;
const float nms_threshold = 0.5f;
// pad to multiple of 32
int w = width;
int h = height;
float scale = 1.f;
if (w > h)
{
scale = (float)target_size / w;
w = target_size;
h = h * scale;
}
else
{
scale = (float)target_size / h;
h = target_size;
w = w * scale;
}
ncnn::Mat in = ncnn::Mat::from_pixels_resize(bgr.data, ncnn::Mat::PIXEL_BGR2RGB, width, height, w, h);
// pad to target_size rectangle
int wpad = (w + 31) / 32 * 32 - w;
int hpad = (h + 31) / 32 * 32 - h;
ncnn::Mat in_pad;
ncnn::copy_make_border(in, in_pad, hpad / 2, hpad - hpad / 2, wpad / 2, wpad - wpad / 2, ncnn::BORDER_CONSTANT, 0.f);
const float norm_vals[3] = { 1 / 255.f, 1 / 255.f, 1 / 255.f };
in_pad.substract_mean_normalize(0, norm_vals);
ncnn::Extractor ex = yolov8.create_extractor();
ex.input("images", in_pad);
ncnn::Mat out;
ex.extract("output0", out);
ncnn::Mat mask_proto;
ex.extract("output1", mask_proto);
std::vector<int> strides = { 8, 16, 32 };
std::vector<GridAndStride> grid_strides;
generate_grids_and_stride(in_pad.w, in_pad.h, strides, grid_strides);
std::vector<Object> proposals;
std::vector<Object> objects8;
generate_proposals(grid_strides, out, prob_threshold, objects8);
proposals.insert(proposals.end(), objects8.begin(), objects8.end());
// sort all proposals by score from highest to lowest
qsort_descent_inplace(proposals);
// apply nms with nms_threshold
std::vector<int> picked;
nms_sorted_bboxes(proposals, picked, nms_threshold);
int count = picked.size();
ncnn::Mat mask_feat = ncnn::Mat(32, count, sizeof(float));
for (int i = 0; i < count; i++) {
float* mask_feat_ptr = mask_feat.row(i);
std::memcpy(mask_feat_ptr, proposals[picked[i]].mask_feat.data(), sizeof(float) * proposals[picked[i]].mask_feat.size());
}
ncnn::Mat mask_pred_result;
decode_mask(mask_feat, width, height, mask_proto, in_pad, wpad, hpad, mask_pred_result);
objects.resize(count);
for (int i = 0; i < count; i++)
{
objects[i] = proposals[picked[i]];
// adjust offset to original unpadded
float x0 = (objects[i].rect.x - (wpad / 2)) / scale;
float y0 = (objects[i].rect.y - (hpad / 2)) / scale;
float x1 = (objects[i].rect.x + objects[i].rect.width - (wpad / 2)) / scale;
float y1 = (objects[i].rect.y + objects[i].rect.height - (hpad / 2)) / scale;
// clip
x0 = std::max(std::min(x0, (float)(width - 1)), 0.f);
y0 = std::max(std::min(y0, (float)(height - 1)), 0.f);
x1 = std::max(std::min(x1, (float)(width - 1)), 0.f);
y1 = std::max(std::min(y1, (float)(height - 1)), 0.f);
objects[i].rect.x = x0;
objects[i].rect.y = y0;
objects[i].rect.width = x1 - x0;
objects[i].rect.height = y1 - y0;
objects[i].mask = cv::Mat::zeros(height, width, CV_32FC1);
cv::Mat mask = cv::Mat(height, width, CV_32FC1, (float*)mask_pred_result.channel(i));
mask(objects[i].rect).copyTo(objects[i].mask(objects[i].rect));
}
return 0;
}
static void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
{
static const char* class_names[] = {
"phone",
};
static const unsigned char colors[2][3] = {
{56, 0, 255},
{226, 255, 0},
};
cv::Mat image = bgr.clone();
int color_index = 0;
for (size_t i = 0; i < objects.size(); i++)
{
const Object& obj = objects[i];
const unsigned char* color = colors[color_index % 1];
color_index++;
cv::Scalar cc(color[0], color[1], color[2]);
fprintf(stderr, "%d = %.5f at %.2f %.2f %.2f x %.2f\n", obj.label, obj.prob,
obj.rect.x, obj.rect.y, obj.rect.width, obj.rect.height);
for (int y = 0; y < image.rows; y++) {
uchar* image_ptr = image.ptr(y);
const float* mask_ptr = obj.mask.ptr<float>(y);
for (int x = 0; x < image.cols; x++) {
if (mask_ptr[x] >= 0.5)
{
image_ptr[0] = cv::saturate_cast<uchar>(image_ptr[0] * 0.5 + color[2] * 0.5);
image_ptr[1] = cv::saturate_cast<uchar>(image_ptr[1] * 0.5 + color[1] * 0.5);
image_ptr[2] = cv::saturate_cast<uchar>(image_ptr[2] * 0.5 + color[0] * 0.5);
}
image_ptr += 3;
}
}
cv::rectangle(image, obj.rect, cc, 2);
char text[256];
sprintf(text, "%s %.1f%%", class_names[obj.label], obj.prob * 100);
int baseLine = 0;
cv::Size label_size = cv::getTextSize(text, cv::FONT_HERSHEY_SIMPLEX, 0.5, 1, &baseLine);
int x = obj.rect.x;
int y = obj.rect.y - label_size.height - baseLine;
if (y < 0)
y = 0;
if (x + label_size.width > image.cols)
x = image.cols - label_size.width;
cv::rectangle(image, cv::Rect(cv::Point(x, y), cv::Size(label_size.width, label_size.height + baseLine)),
cv::Scalar(255, 255, 255), -1);
cv::putText(image, text, cv::Point(x, y + label_size.height),
cv::FONT_HERSHEY_SIMPLEX, 0.5, cv::Scalar(0, 0, 0));
}
cv::imshow("image", image);
cv::imwrite("result.jpg", image);
cv::waitKey(0);
}
int main(int argc, char** argv)
{
if (argc != 5)
{
fprintf(stderr, "Usage: %s [ncnn_bin_path] [ncnn_param_path] [imgsz] [imagepath]\n", argv[0]);
return -1;
}
std::string ncnn_bin_path = argv[1];
std::string ncnn_param_path = argv[2];
std::string imgsz_str = argv[3];
std::string imagepath = argv[4];
int imgsz = std::stoi(imgsz_str);
cv::Mat m = cv::imread(imagepath, 1);
if (m.empty())
{
fprintf(stderr, "cv::imread %s failed\n", imagepath);
return -1;
}
else
{
std::cout << ncnn_bin_path << '\n';
std::cout << ncnn_param_path << '\n';
std::cout << imgsz << '\n';
std::cout << imagepath << '\n';
}
std::vector<Object> objects;
detect_yolov8(ncnn_bin_path, ncnn_param_path, imgsz, m, objects);
draw_objects(m, objects);
return 0;
}
As the pf_phone_yolov8n_seg.pt
model is one instance segment model.
-
modify
num_class = 8
tonum_class = 1
instatic void generate_proposals(std::vector<GridAndStride> grid_strides, const ncnn::Mat& pred, float prob_threshold, std::vector<Object>& objects)
function. -
modify
class_names
andcolors
variables instatic void draw_objects(const cv::Mat& bgr, const std::vector<Object>& objects)
function.
3.2 Build and Test
$ cd /home/tianzx/ai_model/seg/pf_phone_seg/
$ mkdir build && cd build
$ cmake ..
$ make -j$(nproc)
$ ./pf_phone_seg ../pf_phone_yolov8n_seg.bin ../pf_phone_yolov8n_seg.param 640 /home/tianzx/Pictures/001_1.jpg
../pf_phone_yolov8n_seg.bin
../pf_phone_yolov8n_seg.param
640
/home/tianzx/Pictures/001_1.jpg
0 = 0.96407 at 102.32 91.11 859.72 x 406.88