Openvino推理问题记录(持续更新)

WD-ss

已于 2023-11-20 15:13:38 修改

阅读量231

点赞数

文章标签： openvino 人工智能

于 2023-10-20 14:40:00 首次发布

本文链接：https://blog.csdn.net/WD_SS/article/details/133945354

版权

Openvino推理问题记录：

记录Openvino推理过程中的问题，欢迎各位大佬指教，多谢多谢！！！

Openvino的检测推理（YoloV5）

这里给出python端代码和结果

import glob, os
import onnxruntime as ort
import openvino.runtime as ov

import cv2
import numpy as np
import torch, torchvision
import time
from common.utils import cv_imread, cv_imwrite


def clip_coords(boxes, shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[:, 0].clamp_(0, shape[1])  # x1
        boxes[:, 1].clamp_(0, shape[0])  # y1
        boxes[:, 2].clamp_(0, shape[1])  # x2
        boxes[:, 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def box_iou(box1, box2):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    def box_area(box):
        # box = 4xn
        return (box[2] - box[0]) * (box[3] - box[1])

    area1 = box_area(box1.T)
    area2 = box_area(box2.T)

    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)


def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def non_max_suppression(prediction, conf_thres=0.05, iou_thres=0.01, classes=None, agnostic=False, multi_label=False,
                        labels=(), max_det=300):
    """Runs Non-Maximum Suppression (NMS) on inference results

    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """

    nc = prediction.shape[2] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'

    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

    t = time.time()
    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference

        # Apply constraints
        x = x[xc[xi]]  # confidence
        # Cat apriri labels if autolabelling

        if labels and len(labels[xi]):
            l = labels[xi]
            v = torch.zeros((len(l), nc + 5), device=x.device)
            v[:, :4] = l[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
            x = torch.cat((x, v), 0)
        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            print(f'WARNING: NMS time limit {time_limit}s exceeded')
            break  # time limit exceeded

    return output


def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)
    
class YoloV5InferOpenvino:
    def __init__(self, xml_path, device_id=0, input_size=(640, 640), score=0.45, iou_threa=0.45,
                 pad_color=(114, 114, 114)):
        super(YoloV5InferOpenvino, self).__init__()

        self.input_size = input_size
        self.pad_color = pad_color
        self.score = score
        self.iou_threa = iou_threa

        # 1、创建openvino初始化的引擎
        core = ov.Core()
		devices = core.available_devices
        for device in devices:
            device_name = core.get_property(device, "FULL_DEVICE_NAME")
            print(f"{device}: {device_name}")
		
        model_path = xml_path
        model = core.read_model(model_path)
        self.compiled_model = core.compile_model(model, "CPU")

        self.infer_request = self.compiled_model.create_infer_request()
        print(self.infer_request)
        input_tensor1 = self.infer_request.get_input_tensor(0)
        # assert input_tensor1.data.dtype == np.int64
        # 5.2 也可通过tensor name获取tensor,因为用的网络只有一个输入，且输入的节点名为 inputs
        # 因此，两种方法获取的tensor应该是相等的
        input_tensor2 = self.infer_request.get_tensor("images")

    def _preprocess(self, bgr_img):
        img_size = self.input_size
        color = self.pad_color
        img_copy, ratio, (dw, dh) = letterbox(bgr_img, img_size, auto=False)
        img = np.array(img_copy, np.float32)
        img = img / 255.
        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)
        mul_batch_inputs = [img]  # ,input2
        x = np.array(mul_batch_inputs)
        return x

    def infer(self, img0):
        # img0 = cv_imread(img_path)
        x_input = self._preprocess(img0)
        input_name = self.compiled_model.inputs[0].names
        out_name = self.compiled_model.outputs[0].names
        print("input_name = ", input_name)
        pred_onnx_outs = self.infer_request.infer(inputs={"images": x_input})
        output_tensor = self.infer_request.get_output_tensor(0).data
        print(output_tensor.shape)
        # for i in range(len(pred_onnx_outs)):
        #     print(self.onnx_session.get_outputs()[i].name, pred_onnx_outs[i].shape)
        pred_onnx_out = output_tensor
        pred_onnx_out = torch.tensor(pred_onnx_out)
        # print(pred_onnx_out)
        pred = non_max_suppression(pred_onnx_out, conf_thres=self.score, iou_thres=self.iou_threa, labels=[],
                                   multi_label=True,
                                   agnostic=False)

        for i, det in enumerate(pred):  # per image
            if len(det):
                det[:, :4] = scale_coords(x_input.shape[2:], det[:, :4], img0.shape).round()
                det_numpy = det.numpy()
                return det_numpy
            else:
                return []


if __name__ == "__main__":
    CLASSES = ["a", "b", "c"]
    color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]

    weight_path = "./model/yolov5.xml"
    model = YoloV5InferOpenvino(weight_path, input_size=[640, 640], score=0.1)
    img_path = "./img/test.jpg"
    
    img0 = cv_imread(img_path)

    det_numpy = model.infer(img0)
    if len(det_numpy) > 0:
        for i in range(det_numpy.shape[0]):
            bbox = det_numpy[i]
            xmin = int(bbox[0])
            ymin = int(bbox[1])
            xmax = int(bbox[2])
            ymax = int(bbox[3])
            score = float(bbox[4])
            idx = int(bbox[5])

            img0 = cv2.rectangle(img0, (xmin, ymin), (xmax, ymax), color[idx], 2)  # filled
            img0 = cv2.putText(img0, CLASSES[idx] + " " + str(score)[:6], (xmin, ymin), 1, 2, color[idx],
                               2)
        cv2.imshow("name", img0)
        cv2.waitKey(0)

在这里插入图片描述

C++端代码和结果(OpenVINO Runtime API 2.0)

#pragma once
#include <opencv2/dnn.hpp>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>

using namespace std;


std::vector<cv::Scalar> colors_map = { cv::Scalar(0, 0, 255) , cv::Scalar(0, 255, 0) , cv::Scalar(255, 0, 0) ,
                                   cv::Scalar(255, 255, 0) , cv::Scalar(0, 255, 255) , cv::Scalar(255, 0, 255) };

const std::vector<std::string> class_names_Hawkeye = { "a", "b", "c" };

cv::Mat letterbox(cv::Mat& img, std::vector<float>& paddings, std::vector<int> new_shape = { 640, 640 })
{
    int img_h = img.rows;
    int img_w = img.cols;

    // Compute scale ratio(new / old) and target resized shape
    float scale = std::min(new_shape[1] * 1.0 / img_h, new_shape[0] * 1.0 / img_w);
    int resize_h = int(round(img_h * scale));
    int resize_w = int(round(img_w * scale));
    paddings[0] = scale;

    // Compute padding
    int pad_h = new_shape[1] - resize_h;
    int pad_w = new_shape[0] - resize_w;

    // Resize and pad image while meeting stride-multiple constraints
    cv::Mat resized_img;
    cv::resize(img, resized_img, cv::Size(resize_w, resize_h));

    // divide padding into 2 sides
    float half_h = pad_h * 1.0 / 2;
    float half_w = pad_w * 1.0 / 2;
    paddings[1] = half_h;
    paddings[2] = half_w;

    // Compute padding boarder
    int top = int(round(half_h - 0.1));
    int bottom = int(round(half_h + 0.1));
    int left = int(round(half_w - 0.1));
    int right = int(round(half_w + 0.1));

    // Add border
    cv::copyMakeBorder(resized_img, resized_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114));

    return resized_img;
}


void OV_Det(string& xml_path, string& img_path) {

    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // Step 1. Initialize OpenVINO Runtime Core
    ov::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.get_available_devices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    // std::shared_ptr<ov::Model> model = core.read_model(xml_path);
    // ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
    
    auto compiled_model = core.compile_model(xml_path, "CPU");

    const auto inputShape = compiled_model.input().get_shape();
    std::unordered_set<std::string> inputNames = compiled_model.input().get_names();
    std::cout << "Input Tensor: " << *inputNames.begin() << "  Shape: " << inputShape << std::endl;


    std::vector<ov::Output<const ov::Node>> outputInfos = compiled_model.outputs();
    for (auto& outputInfo : outputInfos)
    {
        std::unordered_set<std::string> outputNames = outputInfo.get_names();
        const auto outputShape = outputInfo.get_shape();
        std::cout << "Output Tensor: " << *outputNames.begin() << "  Shape: " << outputShape << std::endl;
    }

    cv::Mat srcMat = cv::imread(img_path);
    std::vector<float> paddings(3);       //scale, half_h, half_w
    std::vector<int> reshape = { 640, 640 };
    cv::Mat resized_img = letterbox(srcMat, paddings, reshape);
    // BGR->RGB, u8(0-255)->f32(0.0-1.0), HWC->NCHW
    cv::Mat input_data = cv::dnn::blobFromImage(resized_img, 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true);

    // Step 5. Create tensor from image
    ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data.ptr(0));


    // Step 6. Create an infer request for model inference 
    ov::InferRequest infer_request = compiled_model.create_infer_request();
    infer_request.set_input_tensor(input_tensor);
    infer_request.infer();

    //Step 7. Retrieve inference results 
    const ov::Tensor& output_tensor = infer_request.get_tensor("output");
    //const ov::Tensor& output_tensor = infer_request.get_output_tensor(1);
    ov::Shape output_shape = output_tensor.get_shape();
    std::cout << "Output Shape: " << output_shape << std::endl;


    // Detect Matrix: 25200 x 8
    cv::Mat detect_buffer(output_shape[1], output_shape[2], CV_32F, output_tensor.data());

    float conf_threshold = 0.25;
    float nms_threshold = 0.5;
    std::vector<cv::Rect> boxes;
    std::vector<int> class_ids;
    std::vector<float> class_scores;
    std::vector<float> confidences;

    std::vector<cv::Mat> masks;

    // cx,cy,w,h,confidence,c1,c2,...c80
    float scale = paddings[0];

    for (int i = 0; i < detect_buffer.rows; i++) {
        float confidence = detect_buffer.at<float>(i, 4);
        if (confidence < conf_threshold) {
            continue;
        }
        cv::Mat classes_scores = detect_buffer.row(i).colRange(5, 8);
        cv::Point class_id;
        double score;
        cv::minMaxLoc(classes_scores, NULL, &score, NULL, &class_id);

        // class score: 0~1
        if (score > 0.25)
        {
            float cx = detect_buffer.at<float>(i, 0);
            float cy = detect_buffer.at<float>(i, 1);
            float w = detect_buffer.at<float>(i, 2);
            float h = detect_buffer.at<float>(i, 3);
            int left = static_cast<int>((cx - 0.5 * w - paddings[2]) / scale);
            int top = static_cast<int>((cy - 0.5 * h - paddings[1]) / scale);
            int width = static_cast<int>(w / scale);
            int height = static_cast<int>(h / scale);
            cv::Rect box;
            box.x = left;
            box.y = top;
            box.width = width;
            box.height = height;

            boxes.push_back(box);
            class_ids.push_back(class_id.x);
            class_scores.push_back(score);
            confidences.push_back(confidence);
        }
    }

    // NMS
    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);

    cv::Mat rgb_mask = cv::Mat::zeros(srcMat.size(), srcMat.type());

    // -------- Step 8. Visualize the detection results -----------
    for (size_t i = 0; i < indices.size(); i++) {
        int index = indices[i];
        int class_id = class_ids[index];

        cv::rectangle(srcMat, boxes[index], colors_map[class_id % 6], 2, 8);
        std::string label = class_names[class_id] + ":" + std::to_string(class_scores[index]);
        cv::putText(srcMat, label, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, colors_map[class_id % 6]);
    }

    cv::namedWindow("YOLOv5 OpenVINO Inference C++ Demo");
    cv::imshow("YOLOv5 OpenVINO Inference C++ Demo", srcMat);
    cv::waitKey(0);
    cv::destroyAllWindows();

}

在这里插入图片描述
C++端代码和结果(Inference Engine API)

void inference_Det(string& xml_path, string& bin_path, string& img_path) {
   
    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;
    
    // Step 1. Initialize OpenVINO Runtime Core
    InferenceEngine::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.GetAvailableDevices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    InferenceEngine::CNNNetwork model = core.ReadNetwork(xml_path, bin_path);

    // 获取输入名称和Tensor信息
    InferenceEngine::InputsDataMap inputsInfo = model.getInputsInfo();
    for (auto& inputInfo : inputsInfo) {
        const std::string& inputName = inputInfo.first;
        //std::cout << "Input Name: " << inputName << std::endl;

        const InferenceEngine::DataPtr& inputData = inputInfo.second->getInputData();
        const InferenceEngine::TensorDesc& inputDesc = inputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& inputDims = inputDesc.getDims();

        std::cout << "Input Tensor '" << inputName << "' Size: ";
        for (size_t dim : inputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }

    // 获取输入并进行设置（第一种方式）
    auto item = inputsInfo.begin();
    const std::string& image_info_name = item->first;//获取image_info输入的名字

    // 获取输出名称和Tensor信息
    InferenceEngine::OutputsDataMap outputsInfo = model.getOutputsInfo();
    for (auto& outputInfo : outputsInfo) {
        const std::string& outputName = outputInfo.first;
        //std::cout << "Output Name: " << outputName << std::endl;

        const InferenceEngine::DataPtr& outputData = outputInfo.second;
        const InferenceEngine::TensorDesc& outputDesc = outputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& outputDims = outputDesc.getDims();

        std::cout << "Output Tensor '" << outputName << "' Size: ";
        for (size_t dim : outputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }

    // Step 5. Compilation Model 编译模型
    InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(model, "CPU");

    // 创建推理请求
    InferenceEngine::InferRequest infer_request = executableNetwork.CreateInferRequest();

    
    cv::Mat srcMat = cv::imread(img_path);

    /* 记录图片原始H，W */
    auto original_height = srcMat.rows;
    auto original_width = srcMat.cols;

    cv::Size resizeShape(640, 640);
    cv::Mat matNormImage;
    preDataDet(srcMat, resizeShape, matNormImage); // 减均值除方差

    // 将图像加载到模型的输入Tensor中
    InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(image_info_name);
    const InferenceEngine::SizeVector input_dims = input_blob->getTensorDesc().getDims();
    const size_t channels = input_dims[1];
    const size_t height = input_dims[2];
    const size_t width = input_dims[3];

    // 将图像数据复制到输入Blob
    InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob)->wmap();
    float* blob_data = blobMapped.as<float*>();

    for (size_t h = 0; h < height; ++h) {
        for (size_t w = 0; w < width; ++w) {
            for (size_t c = 0; c < channels; ++c) {
                blob_data[c * height * width + h * width + w] = matNormImage.at<float>(h, w * channels + c);
                //blob_data[c * height * width + h * width + w] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
            }

        }
    }


    infer_request.Infer();

    int img_w = 8;
    int img_h = 25200;

    // 获取推理结果
    const float* output_buffer = new float[img_w * img_h];
    for (auto& item : outputsInfo) {
        auto output_name = item.first;
        if (output_name == "output")
        {
            auto output = infer_request.GetBlob(output_name);
            auto const memLocker = output->cbuffer(); // use const memory locker
            // output_buffer is valid as long as the lifetime of memLocker
            output_buffer = memLocker.as<const float*>();

            cv::Mat probImg = cv::Mat(cv::Size(img_w, img_h), CV_32FC1);
            //单张图
            memcpy(probImg.data, (const float*)output_buffer, (size_t)img_w * img_h * sizeof(float));
            //std::cout << probImg.rows << std::endl;

            std::vector<int> candi;
            for (size_t y = 0; y < probImg.rows; ++y)
            {
                float tmpVal = probImg.at<float>(y, 4);

                //std::cout << y << " " << tmpVal << std::endl;

                if (tmpVal > 0.7)
                {
                    std::cout << y << " " << tmpVal << std::endl;
                    candi.push_back(y);
                }
            }

            //Detections matrix nx6 (xyxy, conf, cls)
            std::vector<TDetectBbox> all_box;
            for (size_t g = 0; g < candi.size(); ++g)
            {
                float* ptr = probImg.ptr<float>(candi[g]);  //候选行的首地址

                TDetectBbox candi_single;
                std::vector<float> score;

                for (size_t m = 0; m < 3; ++m) //3个类别 获取类别得分
                {
                    float _temVal = ptr[m + 5];
                    score.push_back(_temVal);
                }

                //返回score的最大值和index
                float _score{ 0.0 };
                int classIndx = findVectorMax(score, _score); // 获取得分最高的类别和得分

                candi_single.x0 = ptr[0] - ptr[2] / 2; //目标框中心点和宽高xywh转左上右下顶点xyxy
                candi_single.y0 = ptr[1] - ptr[3] / 2;
                candi_single.x1 = ptr[0] + ptr[2] / 2;
                candi_single.y1 = ptr[1] + ptr[3] / 2;
                float _confThresh = ptr[4];
                candi_single.confidence = ptr[4] * _score;
                candi_single.classIndx = classIndx;
                all_box.push_back(candi_single);
            }

            //计算nms
            nms(all_box, 0.4);

            //计算坐标映射关系
            int shift_x{ 0 }, shift_y{ 0 };
            float scale_x{ .0f }, scale_y{ .0f };

            //当前是直接resize
            scale_x = (float)width / original_width;
            scale_y = (float)height / original_height;

            for (size_t i = 0; i < all_box.size(); i++)
            {
                if (all_box[i].confidence > 0.7)
                {
                    int x1 = Max(0, (int)(all_box[i].x0 - shift_x) / scale_x);
                    int y1 = Max(0, (int)(all_box[i].y0 - shift_y) / scale_y);
                    int x2 = Min(original_width, (int)(all_box[i].x1 - shift_x) / scale_x);
                    int y2 = Min(original_height, (int)(all_box[i].y1 - shift_y) / scale_y);

                    cv::rectangle(srcMat, cv::Rect(x1, y1, x2 - x1, y2 - y1), cv::Scalar(255, 255, 255));

                    std::cout << "confidence: " << all_box[i].confidence << std::endl;
                }
            }
            cv::imshow("Image", srcMat);
            cv::waitKey(0);

        }
    }
}

在这里插入图片描述

Openvino的语义分割推理（ENet）

python端代码和结果

import openvino.runtime as ov
import cv2
import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':

    # 1、创建openvino初始化的引擎
    core = ov.Core()

    devices = core.available_devices
    for device in devices:
        device_name = core.get_property(device, "FULL_DEVICE_NAME")
        print(f"{device}: {device_name}")

    # 1-temp、加载自定义的op算子，强烈推荐使用OpenVINO的拓展接口来写自定义的算子
    # 通常用不到
    # core.add_extension("path_to_extension_library.so")

    # 2、从本地磁盘上读取模型
    model_path = "./model/ENet.xml"
    model = core.read_model(model_path)

    # 3.1 从设备上，加载模型
    # 1、2中只是读取模型文件，还没有编译，所以需要经过编译加载模型
    compiled_model = core.compile_model(model, "CPU")
    print(compiled_model)
    for i in range(len(compiled_model.inputs)):
        print(compiled_model.inputs[i].names, compiled_model.inputs[i].shape)

    for i in range(len(compiled_model.outputs)):
        print(compiled_model.outputs[i].names, compiled_model.outputs[i].shape)
    print("-------------------------------------")

    # 4. 创建推理请求
    infer_request = compiled_model.create_infer_request()

    # 5.填充数据
    # 5.1 通过tensor排序的下标获取tensor
    input_tensor1 = infer_request.get_input_tensor(0)
    # assert input_tensor1.data.dtype == np.int64
    # 5.2 也可通过tensor name获取tensor,因为用的网络只有一个输入，且输入的节点名为 inputs
    # 因此，两种方法获取的tensor应该是相等的
    input_tensor2 = infer_request.get_tensor("images")
    assert input_tensor2 != input_tensor1, "两个Tensor不相等"
    print("-------------------------------------")

    # 6 读取数据，预处理为float32的格式，转换通道等
    image_path = "./image/test.jpg"
    images = cv2.imdecode(np.fromfile(image_path, dtype=np.float32), cv2.IMREAD_COLOR)
    images = cv2.resize(images, (256, 256), interpolation=cv2.INTER_LINEAR)
    images = images / 255.
    images = images.transpose((2, 0, 1))
    images = np.array(images, np.float32)
    x = []
    x.append(images)
    x = np.array(x)
    print("-------------------------------------")
    
	res = infer_request.infer(inputs={"images": x})
    for key in res.keys():
        print(res[key].shape)

    # 6.2 获取结果方式2：通过get_output_tensor()
    output_tensor = infer_request.get_output_tensor(1)
    print(output_tensor.data.shape)

    plt.imshow(output_tensor.data[0], "gray")
    plt.colorbar(label='max value test')
    plt.show()
    print("hello world")

在这里插入图片描述
C++端代码和结果(OpenVINO Runtime API 2.0)

#pragma once
#include <opencv2/dnn.hpp>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>

using namespace std;

void OV_Seg(string& xml_path, string& img_path) {

    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // Step 1. Initialize OpenVINO Runtime Core
    ov::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.get_available_devices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    // std::shared_ptr<ov::Model> model = core.read_model(xml_path);
    // ov::CompiledModel compiled_model = core.compile_model(model, "CPU");

    auto compiled_model = core.compile_model(xml_path, "CPU");
    ov::InferRequest infer_request = compiled_model.create_infer_request();

    const auto inputShape = compiled_model.input().get_shape();
    std::unordered_set<std::string> inputNames = compiled_model.input().get_names();
    std::cout << "Input Tensor: " << *inputNames.begin() << "  Shape: " << inputShape << std::endl;


    std::vector<ov::Output<const ov::Node>> outputInfos = compiled_model.outputs();
    for (auto& outputInfo : outputInfos)
    {
        std::unordered_set<std::string> outputNames = outputInfo.get_names();
        const auto outputShape = outputInfo.get_shape();
        std::cout << "Output Tensor: " << *outputNames.begin() << "  Shape: " << outputShape << std::endl;
    }

    cv::Mat srcMat = cv::imread(img_path);

    auto original_height = srcMat.rows;
    auto original_width = srcMat.cols;
    
    cv::Size resizeShape(256, 256);
    cv::Mat matNormImage;
    preData(srcMat, resizeShape, matNormImage); // 减均值除方差

    ov::Tensor input_tensor = infer_request.get_input_tensor();
    float* const input_data = input_tensor.data<float>();
    for (int batch = 0; batch < inputShape[0]; batch++)
    {
        for (int h = 0; h < inputShape[2]; h++)
        {
            for (int w = 0; w < inputShape[3]; w++) 
            {
                for (int c = 0; c < inputShape[1]; c++)
                {
                    int out_index = batch * inputShape[1] * inputShape[2] * inputShape[3] + c * inputShape[2] * inputShape[3] + h * inputShape[3] + w;
                    input_data[out_index] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
                }

            }
        }

    }

    // Step 6. Create an infer request for model inference 
    infer_request.infer();

    //Step 7. Retrieve inference results 
    //const ov::Tensor& output_tensor = infer_request.get_tensor("argIdxMapImg");
    const ov::Tensor& output_tensor = infer_request.get_output_tensor(1);
    ov::Shape output_shape = output_tensor.get_shape();
    std::cout << "Output Shape: " << output_shape << std::endl;

    const float* result = output_tensor.data<const float>();

    vector<cv::Mat> masks;

    for (int batch = 0; batch < output_shape[0]; batch++)
    {
        cv::Mat mask = cv::Mat::zeros(output_shape[1], output_shape[2], CV_8UC1);
        for (int h = 0; h < output_shape[1]; h++)
        {
            for (int w = 0; w < output_shape[2]; w++)
            {
                int out_index = batch * output_shape[1] * output_shape[2]+ h * inputShape[3] + w;
                float out_value = result[out_index];
                mask.at<uchar>(h, w) = out_value;
            }
        }
        masks.push_back(mask);
        cv::namedWindow("Image", cv::WINDOW_NORMAL);
        cv::imshow("Image", mask*255);
        cv::waitKey(0);
        cv::destroyAllWindows(); 
    }
}

在这里插入图片描述
C++端代码和结果(Inference Engine API)

#include <iostream>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <opencv2/opencv.hpp>
#include "openvino/openvino.hpp"
#include <ie_core.hpp>
#include <opencv2/opencv.hpp>

void inference_seg(string& xml_path, string& bin_path, string& img_path) {

    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // Step 1. Initialize OpenVINO Runtime Core
    InferenceEngine::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.GetAvailableDevices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    InferenceEngine::CNNNetwork model = core.ReadNetwork(xml_path, bin_path);


    // 获取输入名称和Tensor信息
    InferenceEngine::InputsDataMap inputsInfo = model.getInputsInfo();
    for (auto& inputInfo : inputsInfo) {
        const std::string& inputName = inputInfo.first;
        //std::cout << "Input Name: " << inputName << std::endl;

        const InferenceEngine::DataPtr& inputData = inputInfo.second->getInputData();
        const InferenceEngine::TensorDesc& inputDesc = inputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& inputDims = inputDesc.getDims();

        std::cout << "Input Tensor '" << inputName << "' Size: ";
        for (size_t dim : inputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }


    // 获取输入并进行设置（第一种方式）
    auto item = inputsInfo.begin();
    const std::string& image_info_name = item->first;//获取image_info输入的名字

    // 获取输出名称和Tensor信息
    InferenceEngine::OutputsDataMap outputsInfo = model.getOutputsInfo();
    for (auto& outputInfo : outputsInfo) {
        const std::string& outputName = outputInfo.first;
        //std::cout << "Output Name: " << outputName << std::endl;


        const InferenceEngine::DataPtr& outputData = outputInfo.second;
        const InferenceEngine::TensorDesc& outputDesc = outputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& outputDims = outputDesc.getDims();

        std::cout << "Output Tensor '" << outputName << "' Size: ";
        for (size_t dim : outputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }

    // Step 5. Compilation Model 编译模型
    InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(model, "CPU");

    // 创建推理请求
    InferenceEngine::InferRequest infer_request = executableNetwork.CreateInferRequest();
    
    cv::Mat srcMat = cv::imread(img_path);
    cv::Size resizeShape(256, 256);
    cv::Mat matNormImage;
    preDataDet(srcMat, resizeShape, matNormImage); // 减均值除方差

    // 将图像加载到模型的输入Tensor中
    InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(image_info_name);
    const InferenceEngine::SizeVector input_dims = input_blob->getTensorDesc().getDims();
    const size_t channels = input_dims[1];
    const size_t height = input_dims[2];
    const size_t width = input_dims[3];

    // 将图像数据复制到输入Blob
    InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob)->wmap();
    float* blob_data = blobMapped.as<float*>();
    
    for (size_t h = 0; h < height; ++h) {
        for (size_t w = 0; w < width; ++w) {
            for (size_t c = 0; c < channels; ++c) {
                blob_data[c * height * width + h * width + w] = matNormImage.at<float>(h, w * channels + c);
                //blob_data[c * height * width + h * width + w] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
            }

        }
    }


    infer_request.Infer();

    // 获取推理结果
    const float* output_buffer = new float[width * height];
    for (auto& item : outputsInfo) {
        auto output_name = item.first;
        if (output_name == "argIdxMapImg")
        {
            auto output = infer_request.GetBlob(output_name);
            auto const memLocker = output->cbuffer(); // use const memory locker
            // output_buffer is valid as long as the lifetime of memLocker
            output_buffer = memLocker.as<const float*>();
            cv::Mat probImg = cv::Mat(cv::Size(width, height), CV_32FC1);
            //单张图
            memcpy(probImg.data, (const float*)output_buffer, (size_t)width * height * sizeof(float));
            cv::imshow("Image", probImg * 255);
            cv::waitKey(0);
        }
    }
}