使用OnnxRuntime推理Yolov9

朱永博

已于 2024-06-19 17:03:30 修改

阅读量313

点赞数 1

文章标签： c++ YOLO opencv

于 2024-04-18 15:53:04 首次发布

本文链接：https://blog.csdn.net/qq_49595983/article/details/137924670

版权

Python

import cv2
import numpy as np
import onnxruntime as ort
def readClassesNames(file_path):
    with open(file_path, encoding='utf-8') as f:
        class_names = f.readlines()
    class_names = [c.strip() for c in class_names]
    return class_names
classes_names = 'coco.names'
classes = readClassesNames(classes_names)
image = cv2.imread('bus.jpg')
image_height, image_width = image.shape[:2]
model_path = 'yolov9-c.onnx'
start_time = cv2.getTickCount()
session = ort.InferenceSession(model_path, providers=['CPUExecutionProvider'])
conf_thresold = 0.45
iou_threshold = 0.25
model_inputs = session.get_inputs()
input_names = [model_inputs[i].name for i in range(len(model_inputs))]
input_shape = model_inputs[0].shape
model_output = session.get_outputs()
output_names = [model_output[i].name for i in range(len(model_output))]
input_height, input_width = input_shape[2:]
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
resized = cv2.resize(image_rgb, (input_width, input_height))
input_image = resized / 255.0
input_image = input_image.transpose(2,0,1)
input_tensor = input_image[np.newaxis, :, :, :].astype(np.float32)
outputs = session.run(output_names, {input_names[0]: input_tensor})[0]
predictions = np.squeeze(outputs).T
scores = np.max(predictions[:, 4:], axis=1)
predictions = predictions[scores > conf_thresold, :]
scores = scores[scores > conf_thresold]
class_ids = np.argmax(predictions[:, 4:], axis=1)
boxes = predictions[:, :4]
input_shape = np.array([input_width, input_height, input_width, input_height])
boxes = np.divide(boxes, input_shape, dtype=np.float32)
boxes *= np.array([image_width, image_height, image_width, image_height])
boxes = boxes.astype(np.int32)
indices = cv2.dnn.NMSBoxes(boxes, scores, score_threshold=conf_thresold, nms_threshold=iou_threshold)
detections = []
def xywh2xyxy(x):
    y = np.copy(x)
    y[..., 0] = x[..., 0] - x[..., 2] / 2
    y[..., 1] = x[..., 1] - x[..., 3] / 2
    y[..., 2] = x[..., 0] + x[..., 2] / 2
    y[..., 3] = x[..., 1] + x[..., 3] / 2
    return y
for (bbox, score, label) in zip(xywh2xyxy(boxes[indices]), scores[indices], class_ids[indices]):
    bbox = bbox.round().astype(np.int32).tolist()
    cls_id = int(label)
    cls = classes[cls_id]
    cv2.rectangle(image, tuple(bbox[:2]), tuple(bbox[2:]), (0,0,255), 2, 8)
    cv2.rectangle(image, (bbox[0], (bbox[1]-20)), (bbox[2], bbox[1]), (0,255,255), -1)
    cv2.putText(image, f'{cls}', (bbox[0], bbox[1] - 5),
                cv2.FONT_HERSHEY_PLAIN,2, [225, 0, 0], thickness=2)
end_time = cv2.getTickCount()
t = (end_time - start_time)/cv2.getTickFrequency()
fps = 1/t
print(f"EStimated FPS: {fps:.2f}")
cv2.putText(image, 'FPS: {:.2f}'.format(fps), (20, 40), cv2.FONT_HERSHEY_PLAIN, 2, [225, 0, 0], 2, 8);
cv2.imshow("YOLOV9-ONNXRUNTIME", image)
cv2.waitKey(0)

using OpenCvSharp;
using OpenCvSharp.Dnn;
using Microsoft.ML.OnnxRuntime;
using Microsoft.ML.OnnxRuntime.Tensors;
static float sigmoid(float a)
{
    float b = 1.0f / (1.0f + (float)Math.Exp(-a));
    return b;
}
string[] read_class_names(string path)
{
    string[] class_names;
    List<string> str = new List<string>();
    StreamReader sr = new StreamReader(path);
    string line;
    while ((line = sr.ReadLine()) != null)
    {
        str.Add(line);
    }
    class_names = str.ToArray();
    return class_names;
}
string model_path = "yolov9-c.onnx";
string image_path = "bus.jpg";
float conf_threshold = 0.25f;
float nms_threshold = 0.4f;
float scores_threshold = 0.25f;
Mat image = Cv2.ImRead(image_path);
string[] classes_names = read_class_names("coco.names");
int max_image_length = image.Cols > image.Rows ? image.Cols : image.Rows;
Mat max_image = Mat.Zeros(new OpenCvSharp.Size(max_image_length, max_image_length), MatType.CV_8UC3);
Rect roi = new Rect(0, 0, image.Cols, image.Rows);
image.CopyTo(new Mat(max_image, roi));
SessionOptions options = new SessionOptions();
options.LogSeverityLevel = OrtLoggingLevel.ORT_LOGGING_LEVEL_INFO;
options.AppendExecutionProvider_CPU(0);
InferenceSession onnx_session = new InferenceSession(model_path, options);
int[] inputshape = { };
int[] outputshape = { };
var inputmetadata = onnx_session.InputMetadata;
var outputmetadata = onnx_session.OutputMetadata;
foreach (var item in inputmetadata)
{
    inputshape = item.Value.Dimensions;
}
foreach (var item in outputmetadata)
{
    outputshape = item.Value.Dimensions;
}
Mat image_rgb = new Mat();
Cv2.CvtColor(max_image, image_rgb, ColorConversionCodes.BGR2RGB);
Mat resize_image = new Mat();
Cv2.Resize(image_rgb, resize_image, new OpenCvSharp.Size(inputshape[2], inputshape[3]));
long start = Cv2.GetTickCount();
float[] result_array = new float[outputshape[2] * outputshape[1]];
Tensor<float> input_tensor = new DenseTensor<float>(new[] { inputshape[0], inputshape[1], inputshape[2], inputshape[3] });
for (int y = 0; y < resize_image.Height; y++)
{
    for (int x = 0; x < resize_image.Width; x++)
    {
        input_tensor[0, 0, y, x] = resize_image.At<Vec3b>(y, x)[0] / 255f;
        input_tensor[0, 1, y, x] = resize_image.At<Vec3b>(y, x)[1] / 255f;
        input_tensor[0, 2, y, x] = resize_image.At<Vec3b>(y, x)[2] / 255f;
    }
}
List<NamedOnnxValue> input_ontainer = new List<NamedOnnxValue>();
input_ontainer.Add(NamedOnnxValue.CreateFromTensor("images", input_tensor));
IDisposableReadOnlyCollection<DisposableNamedOnnxValue> result_infer = onnx_session.Run(input_ontainer);
DisposableNamedOnnxValue[] results_onnxvalue = result_infer.ToArray();
Tensor<float> result_tensors = results_onnxvalue[0].AsTensor<float>();
result_array = result_tensors.ToArray();
onnx_session.Dispose();
resize_image.Dispose();
image_rgb.Dispose();
List<string> classes = new List<string>();
List<float> scores = new List<float>();
List<Rect> rects = new List<Rect>();
Mat result_data = new Mat(outputshape[1], outputshape[2], MatType.CV_32F, result_array);
result_data = result_data.T();
float[] factors = new float[2];
factors = new float[2];
factors[0] = factors[1] = (float)(max_image_length / (float)inputshape[3]);
List<Rect> position_boxes = new List<Rect>();
List<int> class_ids = new List<int>();
List<float> confidences = new List<float>();
for (int i = 0; i < result_data.Rows; i++)
{
    Mat classes_scores = result_data.Row(i).ColRange(4, 84);
    Point max_classId_point, min_classId_point;
    double max_score, min_score;
    Cv2.MinMaxLoc(classes_scores, out min_score, out max_score,
        out min_classId_point, out max_classId_point);
    if (max_score > scores_threshold)
    {
        float cx = result_data.At<float>(i, 0);
        float cy = result_data.At<float>(i, 1);
        float ow = result_data.At<float>(i, 2);
        float oh = result_data.At<float>(i, 3);
        int x = (int)((cx - 0.5 * ow) * factors[0]);
        int y = (int)((cy - 0.5 * oh) * factors[1]);
        int width = (int)(ow * factors[0]);
        int height = (int)(oh * factors[1]);
        Rect box = new Rect();
        box.X = x;
        box.Y = y;
        box.Width = width;
        box.Height = height;
        position_boxes.Add(box);
        class_ids.Add(max_classId_point.X);
        confidences.Add((float)max_score);
    }
}
int[] indexes = new int[position_boxes.Count];
CvDnn.NMSBoxes(position_boxes, confidences, conf_threshold, nms_threshold, out indexes);
for (int i = 0; i < indexes.Length; i++)
{
    int index = indexes[i];
    Rect box = position_boxes[index];
    Cv2.Rectangle(image, position_boxes[index], new Scalar(0, 0, 255), 2, LineTypes.Link8);
    Cv2.Rectangle(image, new Point(position_boxes[index].TopLeft.X, position_boxes[index].TopLeft.Y - 20),
        new Point(position_boxes[index].BottomRight.X, position_boxes[index].TopLeft.Y), new Scalar(0, 255, 255), -1);
    Cv2.PutText(image, classes_names[class_ids[index]], new Point(position_boxes[index].X, position_boxes[index].Y - 5),
        HersheyFonts.HersheyPlain, 2, new Scalar(255, 0, 0), 2);
}
float t = ((float)(Cv2.GetTickCount() - start)) / ((float)Cv2.GetTickFrequency());
Cv2.PutText(image, string.Concat("FPS:", (1 / t).ToString("0.00")), new Point(20, 40), HersheyFonts.HersheyPlain, 2, new Scalar(255, 0, 0), 2);
Cv2.ImShow("YOLOV9-ONNXRUNTIME", image);
Cv2.WaitKey(0);

C++

#include <fstream>
#include <opencv2/opencv.hpp>
#include <onnxruntime_cxx_api.h>
using namespace std;
using namespace cv;
using namespace Ort;
vector<string> readClassNames(const string& filename) {
    vector<string> classNames;
    ifstream file(filename);
    if (!file.is_open()) {
        cerr << "Error opening file: " << filename << endl;
        return classNames;
    }
    string line;
    while (getline(file, line)) {
        if (!line.empty()) {
            classNames.push_back(line);
        }
    }
    file.close();
    return classNames;
}
int main(int argc, char** argv) 
{
    string filename = "coco.names";
    vector<string> labels = readClassNames(filename);
    Mat image = imread("bus.jpg");
    int ih = image.rows;
    int iw = image.cols;
    string onnxpath = "yolov9-c.onnx";
    wstring modelPath = wstring(onnxpath.begin(), onnxpath.end());
    SessionOptions session_options;
    Env env = Env(ORT_LOGGING_LEVEL_ERROR, "yolov9-c");
    session_options.SetGraphOptimizationLevel(ORT_ENABLE_BASIC);
    Session session_(env, modelPath.c_str(), session_options);
    vector<string> input_node_names;
    vector<string> output_node_names;
    size_t numInputNodes = session_.GetInputCount();
    size_t numOutputNodes = session_.GetOutputCount();
    AllocatorWithDefaultOptions allocator;
    input_node_names.reserve(numInputNodes);
    // 获取输入信息
    int input_w = 0;
    int input_h = 0;
    for (int i = 0; i < numInputNodes; i++) {
        auto input_name = session_.GetInputNameAllocated(i, allocator);
        input_node_names.push_back(input_name.get());
        TypeInfo input_type_info = session_.GetInputTypeInfo(i);
        auto input_tensor_info = input_type_info.GetTensorTypeAndShapeInfo();
        auto input_dims = input_tensor_info.GetShape();
        input_w = input_dims[3];
        input_h = input_dims[2];
        cout << "input format: NxCxHxW = " << input_dims[0] << "x" << input_dims[1] << "x" << input_dims[2] << "x" << input_dims[3] << endl;
    }
    // 获取输出信息
    int output_h = 0;
    int output_w = 0;
    TypeInfo output_type_info = session_.GetOutputTypeInfo(0);
    auto output_tensor_info = output_type_info.GetTensorTypeAndShapeInfo();
    auto output_dims = output_tensor_info.GetShape();
    output_h = output_dims[1]; // 84
    output_w = output_dims[2]; // 8400
    cout << "output format : HxW = " << output_dims[1] << "x" << output_dims[2] << endl;
    for (int i = 0; i < numOutputNodes; i++) {
        auto out_name = session_.GetOutputNameAllocated(i, allocator);
        output_node_names.push_back(out_name.get());
    }
    cout << "input: " << input_node_names[0] << " output: " << output_node_names[0] << endl;
    // format frame
    int64 start = getTickCount();
    int w = image.cols;
    int h = image.rows;
    int _max = max(h, w);
    Mat image_ = Mat::zeros(Size(_max, _max), CV_8UC3);
    Rect roi(0, 0, w, h);
    image.copyTo(image_(roi));
    // fix bug, boxes consistence!
    float x_factor = image_.cols / static_cast<float>(input_w);
    float y_factor = image_.rows / static_cast<float>(input_h);
    Mat blob = dnn::blobFromImage(image_, 1 / 255.0, Size(input_w, input_h), Scalar(0, 0, 0), true, false);
    size_t tpixels = input_h * input_w * 3;
    array<int64_t, 4> input_shape_info{ 1, 3, input_h, input_w };
    auto allocator_info = MemoryInfo::CreateCpu(OrtDeviceAllocator, OrtMemTypeCPU);
    Value input_tensor_ = Value::CreateTensor<float>(allocator_info, blob.ptr<float>(), tpixels, input_shape_info.data(), input_shape_info.size());
    const array<const char*, 1> inputNames = { input_node_names[0].c_str() };
    const array<const char*, 1> outNames = { output_node_names[0].c_str() };
    vector<Value> ort_outputs;
    try {
        ort_outputs = session_.Run(RunOptions{ nullptr }, inputNames.data(), &input_tensor_, 1, outNames.data(), outNames.size());
    }
    catch (exception e) {
        cout << e.what() << endl;
    }
    // output data
    const float* pdata = ort_outputs[0].GetTensorMutableData<float>();
    Mat dout(output_h, output_w, CV_32F, (float*)pdata);
    Mat det_output = dout.t(); 
    session_options.release();
    session_.release();
    // post-process
    vector<Rect> boxes;
    vector<int> classIds;
    vector<float> confidences;
    for (int i = 0; i < det_output.rows; i++) {
        Mat classes_scores = det_output.row(i).colRange(4, 84);
        Point classIdPoint;
        double score;
        minMaxLoc(classes_scores, 0, &score, 0, &classIdPoint);
        if (score > 0.25)
        {
            float cx = det_output.at<float>(i, 0);
            float cy = det_output.at<float>(i, 1);
            float ow = det_output.at<float>(i, 2);
            float oh = det_output.at<float>(i, 3);
            int x = static_cast<int>((cx - 0.5 * ow) * x_factor);
            int y = static_cast<int>((cy - 0.5 * oh) * y_factor);
            int width = static_cast<int>(ow * x_factor);
            int height = static_cast<int>(oh * y_factor);
            Rect box;
            box.x = x;
            box.y = y;
            box.width = width;
            box.height = height;
            boxes.push_back(box);
            classIds.push_back(classIdPoint.x);
            confidences.push_back(score);
        }
    }
    // NMS
    vector<int> indexes;
    dnn::NMSBoxes(boxes, confidences, 0.25, 0.45, indexes);
    for (size_t i = 0; i < indexes.size(); i++) {
        int index = indexes[i];
        int idx = classIds[index];
        rectangle(image, boxes[index], Scalar(0, 0, 255), 2, 8);
        rectangle(image, Point(boxes[index].tl().x, boxes[index].tl().y - 20),
                  Point(boxes[index].br().x, boxes[index].tl().y), Scalar(0, 255, 255), -1);
        putText(image, labels[idx], Point(boxes[index].tl().x, boxes[index].tl().y), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    }
    //FPS render it
    float t = (getTickCount() - start) / static_cast<float>(getTickFrequency());
    putText(image, format("FPS: %.2f", 1/t), Point(20, 40), FONT_HERSHEY_PLAIN, 2.0, Scalar(255, 0, 0), 2, 8);
    imshow("YOLOV9-ONNXRUNTIME", image);
    waitKey(0);
    return 0;
}

CMakeLists.txt

cmake_minimum_required(VERSION 3.0.0)
project(Yolov9)
SET("OpenCV_DIR" "E:\\Opencv\\opencv_vs\\build")
SET("ONNXRUNTIME_DIR" "E:\\Onnxruntime\\cpu\\1.15")
add_executable(Yolov9 main.cpp)
set(CMAKE_CXX_STANDARD 14)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
target_compile_features(Yolov9 PRIVATE cxx_std_14)
find_library(PATH ${ONNXRUNTIME_DIR})
target_include_directories(Yolov9 PRIVATE "${ONNXRUNTIME_DIR}/include")
target_link_libraries(Yolov9 "${ONNXRUNTIME_DIR}/lib/onnxruntime.lib")
find_package(OpenCV REQUIRED)
target_include_directories(Yolov9 PRIVATE ${OpenCV_INCLUDE_DIRS})
target_link_libraries(Yolov9 ${OpenCV_LIBS})