依赖库
- opencv
导出onnx
yolo export model=./runs/segment/train2/weights/best.pt imgsz=640 format=onnx opset=12
代码
说明
yolov8会先检测出目标区域,然后再在目标区域进行关键点检测。我这个模型一张图中只要一个目标区域需要检测关键点,也即预期只有一个目标检测框。
#include<iostream>
#include<opencv.hpp>
#include<filesystem>
/*
yolov8关键点推理
*/
namespace fs = std::filesystem;
cv::Mat modify_image_size(const cv::Mat& img)
{
auto max = std::max(img.rows, img.cols);
cv::Mat ret = cv::Mat::zeros(max, max, CV_8UC3);
img.copyTo(ret(cv::Rect(0, 0, img.cols, img.rows)));
return ret;
}
bool shouldSkip(int n)
{
static int count = 0;
count++;
if (count == 3)
{
count = 0;
return true;
}
return false;
}
int main()
{
const char* onnx_file{ "./best.onnx" }; // 关键点模型
constexpr int input_size[2]{ 640, 640 };
constexpr double confidence_threshold{ 0.9 };
constexpr double iou_threshold{ 0.50 }; // iou threshold
constexpr double mask_threshold{ 0.50 }; // segment mask threshold
const int numPts = 4; // 多少个关键点
constexpr bool cuda_enabled{ false };
const std::string testPath("./027.png");
// 分离出文件名
std::string filename;
if (std::filesystem::exists(std::filesystem::path(testPath)) == false)
{
std::cout << testPath << " is not exists." << std::endl;
return -1;
}
else
{
filename = std::filesystem::path(testPath).filename().string();
std::cout << "filename = " << filename << std::endl;
}
const std::string result_dir{ "./predictResult" };
auto net = cv::dnn::readNetFromONNX(onnx_file);
if (net.empty())
{
std::cerr << "Error: there are no layers in the network: " << onnx_file << std::endl;
return -1;
}
if (cuda_enabled) {
net.setPreferableBackend(cv::dnn::DNN_BACKEND_CUDA);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CUDA);
}
else {
net.setPreferableBackend(cv::dnn::DNN_BACKEND_OPENCV);
net.setPreferableTarget(cv::dnn::DNN_TARGET_CPU);
}
std::cout << "read model complete." << std::endl;
if (!fs::exists(result_dir))
{
fs::create_directories(result_dir);
}
#pragma region 预测
cv::Mat frame = cv::imread(testPath, cv::IMREAD_COLOR);
if (frame.empty())
{
std::cerr << "Warning: unable to load image: " << testPath << std::endl;
return -1;
}
auto tstart = std::chrono::high_resolution_clock::now();
cv::Mat bgr = modify_image_size(frame);
std::cout << "bgr.size=" << bgr.size << std::endl;
#if 0
cv::imshow("w", bgr);
cv::waitKey();
#endif
cv::Mat blob;
cv::dnn::blobFromImage(bgr, blob, 1.0 / 255.0, cv::Size(input_size[1], input_size[0]), cv::Scalar(), true, false);
net.setInput(blob);
std::vector<cv::Mat> outputs;
net.forward(outputs, net.getUnconnectedOutLayersNames());
std::cout << "outputs.size = " << outputs.size() << std::endl;
cv::Mat result = outputs[0];
std::cout << result.size << std::endl;
#pragma endregion
std::vector<int> class_ids;
std::vector<cv::Rect> boxes;
std::vector<cv::Point> keyPoints;
float scalex = frame.cols * 1.f / input_size[1]; // note: image_preprocess function
float scaley = frame.rows * 1.f / input_size[0];
auto scale = (scalex > scaley) ? scalex : scaley;
cv::Mat data0 = cv::Mat(outputs[0].size[1], outputs[0].size[2], CV_32FC1, outputs[0].data).t();
std::cout << "data0.size=" << data0.size << std::endl;
std::cout << "data0.cols=" << data0.cols << ", data0.rows=" << data0.rows << std::endl;
const float* data = (float*)data0.data;
std::vector<float> confidences;
std::vector<std::vector<float>> keypts;
for (auto i = 0; i < data0.rows; ++i)
{// 遍历每一行的box
if (*(data + 4) > confidence_threshold)
{
confidences.emplace_back(*(data + 4));
//keypts.emplace_back(std::vector<float>(data + 5, data + data0.cols));
float x = data[0];
float y = data[1];
float w = data[2];
float h = data[3];
#if 1
int left = std::max(0, std::min(int((x - 0.5 * w) * scale), frame.cols));
int top = std::max(0, std::min(int((y - 0.5 * h) * scale), frame.rows));
int width = std::max(0, std::min(int(w * scale), frame.cols - left));
int height = std::max(0, std::min(int(h * scale), frame.rows - top));
#endif
boxes.emplace_back(cv::Rect(left, top, width, height));
std::vector<float> rowPtsInfo;
for (int ipt = 5; ipt < data0.cols; ipt+=3)
{
std::cout << "ipt=" << ipt << std::endl;
float x = *(data + ipt);
x = std::max(0, std::min(int(std::round(x * scale)), frame.cols));
float y = *(data + ipt + 1);
y = std::max(0, std::min(int(std::round(y * scale)), frame.rows));
float score = *(data + ipt + 2);
rowPtsInfo.push_back(x);
rowPtsInfo.push_back(y);
rowPtsInfo.push_back(score);
}
keypts.emplace_back(rowPtsInfo);
for (int j = 0; j < 17; ++j)
{
std::cout << *(data + j) << " ";
}
std::cout << std::endl;
}
data += data0.cols;
}
#if 0
#pragma region 方法1是取box最高得分对应的关键点,可能不是很准确
std::vector<int> nms_result;// 返回得分最高的box的索引
cv::dnn::NMSBoxes(boxes, confidences, confidence_threshold, iou_threshold, nms_result);
#if 0
for (const auto item : nms_result)
{
std::cout << item << " ";
}
std::cout << std::endl;
#endif
const cv::Size shape_src(frame.cols, frame.rows), // 原始图像大小
shape_input(input_size[1], input_size[0]); // 640
for (size_t i = 0; i < nms_result.size(); ++i)
{
auto index = nms_result[i];// 对应的索引
cv::Rect box = boxes[index];
#if 1
cv::rectangle(frame, box, cv::Scalar(0, 0, 255));
cv::imshow("w", frame);
cv::waitKey();
#endif
std::vector<float> pointsInfo = keypts[index];
for (int i = 0; i < pointsInfo.size(); i += 3)
{
cv::Point loc(pointsInfo[i], pointsInfo[i+1]);
cv::circle(frame, loc, 2, cv::Scalar(0, 0, 255), -1);
}
}
cv::imshow("w", frame);
cv::waitKey();
#pragma endregion
#endif
#if 1
#pragma region 方法2,遍历所有关键点,取每个位置关键点得分最高的
cv::Mat ptsMat(keypts.size(), 3 * numPts, CV_32F);
std::cout << ptsMat.rows << ",,," << ptsMat.cols << std::endl;
std::cout << keypts[0].size() << std::endl;
for (int row = 0; row < ptsMat.rows; ++row)
{
for (int col = 0; col < ptsMat.cols; ++col)
{
ptsMat.at<float>(row, col) = keypts[row][col];
}
}
std::cout << ptsMat << std::endl;
std::vector<cv::Point> taPts;
for (int findCol = 2; findCol < ptsMat.cols; findCol += 3)
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::minMaxLoc(ptsMat.col(findCol), &minVal, &maxVal, &minLoc, &maxLoc);
int findRow = maxLoc.y;
cv::Point taLoc(findCol, findRow);
float x = ptsMat.at<float>(findRow, findCol - 2);
float y = ptsMat.at<float>(findRow, findCol - 1);
cv::circle(frame, cv::Point(x,y), 2, cv::Scalar(0, 0, 255), -1);
}
cv::imshow("w", frame);
cv::waitKey();
cv::imwrite(result_dir + "\\" + filename, frame);
/*for (int irow = 0; irow < keypts.size(); ++irow)
{
std::vector<float> pointsInfo = keypts[irow];
for (int i = 0; i < pointsInfo.size(); i += 3)
{
cv::Point loc(pointsInfo[i], pointsInfo[i + 1]);
cv::circle(frame, loc, 2, cv::Scalar(0, 0, 255), -1);
cv::imshow("w", frame);
cv::waitKey();
}
}*/
#pragma endregion
#endif
return 0;
}