Openvino推理问题记录(持续更新)

Openvino推理问题记录:

记录Openvino推理过程中的问题,欢迎各位大佬指教,多谢多谢!!!

Openvino的检测推理(YoloV5)

这里给出python端代码和结果

import glob, os
import onnxruntime as ort
import openvino.runtime as ov

import cv2
import numpy as np
import torch, torchvision
import time
from common.utils import cv_imread, cv_imwrite


def clip_coords(boxes, shape):
    # Clip bounding xyxy bounding boxes to image shape (height, width)
    if isinstance(boxes, torch.Tensor):  # faster individually
        boxes[:, 0].clamp_(0, shape[1])  # x1
        boxes[:, 1].clamp_(0, shape[0])  # y1
        boxes[:, 2].clamp_(0, shape[1])  # x2
        boxes[:, 3].clamp_(0, shape[0])  # y2
    else:  # np.array (faster grouped)
        boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1])  # x1, x2
        boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0])  # y1, y2


def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
    # Rescale coords (xyxy) from img1_shape to img0_shape
    if ratio_pad is None:  # calculate from img0_shape
        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
    else:
        gain = ratio_pad[0][0]
        pad = ratio_pad[1]

    coords[:, [0, 2]] -= pad[0]  # x padding
    coords[:, [1, 3]] -= pad[1]  # y padding
    coords[:, :4] /= gain
    clip_coords(coords, img0_shape)
    return coords


def box_iou(box1, box2):
    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
    """
    Return intersection-over-union (Jaccard index) of boxes.
    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
    Arguments:
        box1 (Tensor[N, 4])
        box2 (Tensor[M, 4])
    Returns:
        iou (Tensor[N, M]): the NxM matrix containing the pairwise
            IoU values for every element in boxes1 and boxes2
    """

    def box_area(box):
        # box = 4xn
        return (box[2] - box[0]) * (box[3] - box[1])

    area1 = box_area(box1.T)
    area2 = box_area(box2.T)

    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)


def xywh2xyxy(x):
    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
    y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
    return y


def non_max_suppression(prediction, conf_thres=0.05, iou_thres=0.01, classes=None, agnostic=False, multi_label=False,
                        labels=(), max_det=300):
    """Runs Non-Maximum Suppression (NMS) on inference results

    Returns:
         list of detections, on (n,6) tensor per image [xyxy, conf, cls]
    """

    nc = prediction.shape[2] - 5  # number of classes
    xc = prediction[..., 4] > conf_thres  # candidates

    # Checks
    assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
    assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'

    # Settings
    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
    max_nms = 30000  # maximum number of boxes into torchvision.ops.nms()
    time_limit = 10.0  # seconds to quit after
    redundant = True  # require redundant detections
    multi_label &= nc > 1  # multiple labels per box (adds 0.5ms/img)
    merge = False  # use merge-NMS

    t = time.time()
    output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
    for xi, x in enumerate(prediction):  # image index, image inference

        # Apply constraints
        x = x[xc[xi]]  # confidence
        # Cat apriri labels if autolabelling

        if labels and len(labels[xi]):
            l = labels[xi]
            v = torch.zeros((len(l), nc + 5), device=x.device)
            v[:, :4] = l[:, 1:5]  # box
            v[:, 4] = 1.0  # conf
            v[range(len(l)), l[:, 0].long() + 5] = 1.0  # cls
            x = torch.cat((x, v), 0)
        # If none remain process next image
        if not x.shape[0]:
            continue

        # Compute conf
        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
        box = xywh2xyxy(x[:, :4])

        # Detections matrix nx6 (xyxy, conf, cls)
        if multi_label:
            i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
        else:  # best class only
            conf, j = x[:, 5:].max(1, keepdim=True)
            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]

        # Filter by class
        if classes is not None:
            x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]

        # Check shape
        n = x.shape[0]  # number of boxes
        if not n:  # no boxes
            continue
        elif n > max_nms:  # excess boxes
            x = x[x[:, 4].argsort(descending=True)[:max_nms]]  # sort by confidence

        # Batched NMS
        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
        i = torchvision.ops.nms(boxes, scores, iou_thres)  # NMS
        if i.shape[0] > max_det:  # limit detections
            i = i[:max_det]
        if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
            # update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
            iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
            weights = iou * scores[None]  # box weights
            x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
            if redundant:
                i = i[iou.sum(1) > 1]  # require redundancy

        output[xi] = x[i]
        if (time.time() - t) > time_limit:
            print(f'WARNING: NMS time limit {time_limit}s exceeded')
            break  # time limit exceeded

    return output


def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
    # Resize and pad image while meeting stride-multiple constraints
    shape = im.shape[:2]  # current shape [height, width]
    if isinstance(new_shape, int):
        new_shape = (new_shape, new_shape)

    # Scale ratio (new / old)
    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
    if not scaleup:  # only scale down, do not scale up (for better val mAP)
        r = min(r, 1.0)

    # Compute padding
    ratio = r, r  # width, height ratios
    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
    if auto:  # minimum rectangle
        dw, dh = np.mod(dw, stride), np.mod(dh, stride)  # wh padding
    elif scaleFill:  # stretch
        dw, dh = 0.0, 0.0
        new_unpad = (new_shape[1], new_shape[0])
        ratio = new_shape[1] / shape[1], new_shape[0] / shape[0]  # width, height ratios

    dw /= 2  # divide padding into 2 sides
    dh /= 2

    if shape[::-1] != new_unpad:  # resize
        im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
    im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
    return im, ratio, (dw, dh)
    
class YoloV5InferOpenvino:
    def __init__(self, xml_path, device_id=0, input_size=(640, 640), score=0.45, iou_threa=0.45,
                 pad_color=(114, 114, 114)):
        super(YoloV5InferOpenvino, self).__init__()

        self.input_size = input_size
        self.pad_color = pad_color
        self.score = score
        self.iou_threa = iou_threa

        # 1、创建openvino初始化的引擎
        core = ov.Core()
		devices = core.available_devices
        for device in devices:
            device_name = core.get_property(device, "FULL_DEVICE_NAME")
            print(f"{device}: {device_name}")
		
        model_path = xml_path
        model = core.read_model(model_path)
        self.compiled_model = core.compile_model(model, "CPU")

        self.infer_request = self.compiled_model.create_infer_request()
        print(self.infer_request)
        input_tensor1 = self.infer_request.get_input_tensor(0)
        # assert input_tensor1.data.dtype == np.int64
        # 5.2 也可通过tensor name获取tensor,因为用的网络只有一个输入,且输入的节点名为 inputs
        # 因此,两种方法获取的tensor应该是相等的
        input_tensor2 = self.infer_request.get_tensor("images")

    def _preprocess(self, bgr_img):
        img_size = self.input_size
        color = self.pad_color
        img_copy, ratio, (dw, dh) = letterbox(bgr_img, img_size, auto=False)
        img = np.array(img_copy, np.float32)
        img = img / 255.
        # Convert
        img = img.transpose((2, 0, 1))[::-1]  # HWC to CHW, BGR to RGB
        img = np.ascontiguousarray(img)
        mul_batch_inputs = [img]  # ,input2
        x = np.array(mul_batch_inputs)
        return x

    def infer(self, img0):
        # img0 = cv_imread(img_path)
        x_input = self._preprocess(img0)
        input_name = self.compiled_model.inputs[0].names
        out_name = self.compiled_model.outputs[0].names
        print("input_name = ", input_name)
        pred_onnx_outs = self.infer_request.infer(inputs={"images": x_input})
        output_tensor = self.infer_request.get_output_tensor(0).data
        print(output_tensor.shape)
        # for i in range(len(pred_onnx_outs)):
        #     print(self.onnx_session.get_outputs()[i].name, pred_onnx_outs[i].shape)
        pred_onnx_out = output_tensor
        pred_onnx_out = torch.tensor(pred_onnx_out)
        # print(pred_onnx_out)
        pred = non_max_suppression(pred_onnx_out, conf_thres=self.score, iou_thres=self.iou_threa, labels=[],
                                   multi_label=True,
                                   agnostic=False)

        for i, det in enumerate(pred):  # per image
            if len(det):
                det[:, :4] = scale_coords(x_input.shape[2:], det[:, :4], img0.shape).round()
                det_numpy = det.numpy()
                return det_numpy
            else:
                return []


if __name__ == "__main__":
    CLASSES = ["a", "b", "c"]
    color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]

    weight_path = "./model/yolov5.xml"
    model = YoloV5InferOpenvino(weight_path, input_size=[640, 640], score=0.1)
    img_path = "./img/test.jpg"
    
    img0 = cv_imread(img_path)

    det_numpy = model.infer(img0)
    if len(det_numpy) > 0:
        for i in range(det_numpy.shape[0]):
            bbox = det_numpy[i]
            xmin = int(bbox[0])
            ymin = int(bbox[1])
            xmax = int(bbox[2])
            ymax = int(bbox[3])
            score = float(bbox[4])
            idx = int(bbox[5])

            img0 = cv2.rectangle(img0, (xmin, ymin), (xmax, ymax), color[idx], 2)  # filled
            img0 = cv2.putText(img0, CLASSES[idx] + " " + str(score)[:6], (xmin, ymin), 1, 2, color[idx],
                               2)
        cv2.imshow("name", img0)
        cv2.waitKey(0)

在这里插入图片描述

C++端代码和结果(OpenVINO Runtime API 2.0)

#pragma once
#include <opencv2/dnn.hpp>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>

using namespace std;


std::vector<cv::Scalar> colors_map = { cv::Scalar(0, 0, 255) , cv::Scalar(0, 255, 0) , cv::Scalar(255, 0, 0) ,
                                   cv::Scalar(255, 255, 0) , cv::Scalar(0, 255, 255) , cv::Scalar(255, 0, 255) };

const std::vector<std::string> class_names_Hawkeye = { "a", "b", "c" };

cv::Mat letterbox(cv::Mat& img, std::vector<float>& paddings, std::vector<int> new_shape = { 640, 640 })
{
    int img_h = img.rows;
    int img_w = img.cols;

    // Compute scale ratio(new / old) and target resized shape
    float scale = std::min(new_shape[1] * 1.0 / img_h, new_shape[0] * 1.0 / img_w);
    int resize_h = int(round(img_h * scale));
    int resize_w = int(round(img_w * scale));
    paddings[0] = scale;

    // Compute padding
    int pad_h = new_shape[1] - resize_h;
    int pad_w = new_shape[0] - resize_w;

    // Resize and pad image while meeting stride-multiple constraints
    cv::Mat resized_img;
    cv::resize(img, resized_img, cv::Size(resize_w, resize_h));

    // divide padding into 2 sides
    float half_h = pad_h * 1.0 / 2;
    float half_w = pad_w * 1.0 / 2;
    paddings[1] = half_h;
    paddings[2] = half_w;

    // Compute padding boarder
    int top = int(round(half_h - 0.1));
    int bottom = int(round(half_h + 0.1));
    int left = int(round(half_w - 0.1));
    int right = int(round(half_w + 0.1));

    // Add border
    cv::copyMakeBorder(resized_img, resized_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114));

    return resized_img;
}


void OV_Det(string& xml_path, string& img_path) {

    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // Step 1. Initialize OpenVINO Runtime Core
    ov::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.get_available_devices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    // std::shared_ptr<ov::Model> model = core.read_model(xml_path);
    // ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
    
    auto compiled_model = core.compile_model(xml_path, "CPU");

    const auto inputShape = compiled_model.input().get_shape();
    std::unordered_set<std::string> inputNames = compiled_model.input().get_names();
    std::cout << "Input Tensor: " << *inputNames.begin() << "  Shape: " << inputShape << std::endl;


    std::vector<ov::Output<const ov::Node>> outputInfos = compiled_model.outputs();
    for (auto& outputInfo : outputInfos)
    {
        std::unordered_set<std::string> outputNames = outputInfo.get_names();
        const auto outputShape = outputInfo.get_shape();
        std::cout << "Output Tensor: " << *outputNames.begin() << "  Shape: " << outputShape << std::endl;
    }

    cv::Mat srcMat = cv::imread(img_path);
    std::vector<float> paddings(3);       //scale, half_h, half_w
    std::vector<int> reshape = { 640, 640 };
    cv::Mat resized_img = letterbox(srcMat, paddings, reshape);
    // BGR->RGB, u8(0-255)->f32(0.0-1.0), HWC->NCHW
    cv::Mat input_data = cv::dnn::blobFromImage(resized_img, 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true);

    // Step 5. Create tensor from image
    ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data.ptr(0));


    // Step 6. Create an infer request for model inference 
    ov::InferRequest infer_request = compiled_model.create_infer_request();
    infer_request.set_input_tensor(input_tensor);
    infer_request.infer();

    //Step 7. Retrieve inference results 
    const ov::Tensor& output_tensor = infer_request.get_tensor("output");
    //const ov::Tensor& output_tensor = infer_request.get_output_tensor(1);
    ov::Shape output_shape = output_tensor.get_shape();
    std::cout << "Output Shape: " << output_shape << std::endl;


    // Detect Matrix: 25200 x 8
    cv::Mat detect_buffer(output_shape[1], output_shape[2], CV_32F, output_tensor.data());

    float conf_threshold = 0.25;
    float nms_threshold = 0.5;
    std::vector<cv::Rect> boxes;
    std::vector<int> class_ids;
    std::vector<float> class_scores;
    std::vector<float> confidences;

    std::vector<cv::Mat> masks;

    // cx,cy,w,h,confidence,c1,c2,...c80
    float scale = paddings[0];

    for (int i = 0; i < detect_buffer.rows; i++) {
        float confidence = detect_buffer.at<float>(i, 4);
        if (confidence < conf_threshold) {
            continue;
        }
        cv::Mat classes_scores = detect_buffer.row(i).colRange(5, 8);
        cv::Point class_id;
        double score;
        cv::minMaxLoc(classes_scores, NULL, &score, NULL, &class_id);

        // class score: 0~1
        if (score > 0.25)
        {
            float cx = detect_buffer.at<float>(i, 0);
            float cy = detect_buffer.at<float>(i, 1);
            float w = detect_buffer.at<float>(i, 2);
            float h = detect_buffer.at<float>(i, 3);
            int left = static_cast<int>((cx - 0.5 * w - paddings[2]) / scale);
            int top = static_cast<int>((cy - 0.5 * h - paddings[1]) / scale);
            int width = static_cast<int>(w / scale);
            int height = static_cast<int>(h / scale);
            cv::Rect box;
            box.x = left;
            box.y = top;
            box.width = width;
            box.height = height;

            boxes.push_back(box);
            class_ids.push_back(class_id.x);
            class_scores.push_back(score);
            confidences.push_back(confidence);
        }
    }

    // NMS
    std::vector<int> indices;
    cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);

    cv::Mat rgb_mask = cv::Mat::zeros(srcMat.size(), srcMat.type());

    // -------- Step 8. Visualize the detection results -----------
    for (size_t i = 0; i < indices.size(); i++) {
        int index = indices[i];
        int class_id = class_ids[index];

        cv::rectangle(srcMat, boxes[index], colors_map[class_id % 6], 2, 8);
        std::string label = class_names[class_id] + ":" + std::to_string(class_scores[index]);
        cv::putText(srcMat, label, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, colors_map[class_id % 6]);
    }

    cv::namedWindow("YOLOv5 OpenVINO Inference C++ Demo");
    cv::imshow("YOLOv5 OpenVINO Inference C++ Demo", srcMat);
    cv::waitKey(0);
    cv::destroyAllWindows();

}

在这里插入图片描述
C++端代码和结果(Inference Engine API)

void inference_Det(string& xml_path, string& bin_path, string& img_path) {
   
    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;
    
    // Step 1. Initialize OpenVINO Runtime Core
    InferenceEngine::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.GetAvailableDevices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    InferenceEngine::CNNNetwork model = core.ReadNetwork(xml_path, bin_path);

    // 获取输入名称和Tensor信息
    InferenceEngine::InputsDataMap inputsInfo = model.getInputsInfo();
    for (auto& inputInfo : inputsInfo) {
        const std::string& inputName = inputInfo.first;
        //std::cout << "Input Name: " << inputName << std::endl;

        const InferenceEngine::DataPtr& inputData = inputInfo.second->getInputData();
        const InferenceEngine::TensorDesc& inputDesc = inputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& inputDims = inputDesc.getDims();

        std::cout << "Input Tensor '" << inputName << "' Size: ";
        for (size_t dim : inputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }

    // 获取输入并进行设置(第一种方式)
    auto item = inputsInfo.begin();
    const std::string& image_info_name = item->first;//获取image_info输入的名字

    // 获取输出名称和Tensor信息
    InferenceEngine::OutputsDataMap outputsInfo = model.getOutputsInfo();
    for (auto& outputInfo : outputsInfo) {
        const std::string& outputName = outputInfo.first;
        //std::cout << "Output Name: " << outputName << std::endl;

        const InferenceEngine::DataPtr& outputData = outputInfo.second;
        const InferenceEngine::TensorDesc& outputDesc = outputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& outputDims = outputDesc.getDims();

        std::cout << "Output Tensor '" << outputName << "' Size: ";
        for (size_t dim : outputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }

    // Step 5. Compilation Model 编译模型
    InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(model, "CPU");

    // 创建推理请求
    InferenceEngine::InferRequest infer_request = executableNetwork.CreateInferRequest();

    
    cv::Mat srcMat = cv::imread(img_path);

    /* 记录图片原始H,W */
    auto original_height = srcMat.rows;
    auto original_width = srcMat.cols;

    cv::Size resizeShape(640, 640);
    cv::Mat matNormImage;
    preDataDet(srcMat, resizeShape, matNormImage); // 减均值除方差

    // 将图像加载到模型的输入Tensor中
    InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(image_info_name);
    const InferenceEngine::SizeVector input_dims = input_blob->getTensorDesc().getDims();
    const size_t channels = input_dims[1];
    const size_t height = input_dims[2];
    const size_t width = input_dims[3];

    // 将图像数据复制到输入Blob
    InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob)->wmap();
    float* blob_data = blobMapped.as<float*>();

    for (size_t h = 0; h < height; ++h) {
        for (size_t w = 0; w < width; ++w) {
            for (size_t c = 0; c < channels; ++c) {
                blob_data[c * height * width + h * width + w] = matNormImage.at<float>(h, w * channels + c);
                //blob_data[c * height * width + h * width + w] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
            }

        }
    }


    infer_request.Infer();

    int img_w = 8;
    int img_h = 25200;

    // 获取推理结果
    const float* output_buffer = new float[img_w * img_h];
    for (auto& item : outputsInfo) {
        auto output_name = item.first;
        if (output_name == "output")
        {
            auto output = infer_request.GetBlob(output_name);
            auto const memLocker = output->cbuffer(); // use const memory locker
            // output_buffer is valid as long as the lifetime of memLocker
            output_buffer = memLocker.as<const float*>();

            cv::Mat probImg = cv::Mat(cv::Size(img_w, img_h), CV_32FC1);
            //单张图
            memcpy(probImg.data, (const float*)output_buffer, (size_t)img_w * img_h * sizeof(float));
            //std::cout << probImg.rows << std::endl;

            std::vector<int> candi;
            for (size_t y = 0; y < probImg.rows; ++y)
            {
                float tmpVal = probImg.at<float>(y, 4);

                //std::cout << y << " " << tmpVal << std::endl;

                if (tmpVal > 0.7)
                {
                    std::cout << y << " " << tmpVal << std::endl;
                    candi.push_back(y);
                }
            }

            //Detections matrix nx6 (xyxy, conf, cls)
            std::vector<TDetectBbox> all_box;
            for (size_t g = 0; g < candi.size(); ++g)
            {
                float* ptr = probImg.ptr<float>(candi[g]);  //候选行的首地址

                TDetectBbox candi_single;
                std::vector<float> score;

                for (size_t m = 0; m < 3; ++m) //3个类别 获取类别得分
                {
                    float _temVal = ptr[m + 5];
                    score.push_back(_temVal);
                }

                //返回score的最大值和index
                float _score{ 0.0 };
                int classIndx = findVectorMax(score, _score); // 获取得分最高的类别和得分

                candi_single.x0 = ptr[0] - ptr[2] / 2; //目标框中心点和宽高xywh转左上右下顶点xyxy
                candi_single.y0 = ptr[1] - ptr[3] / 2;
                candi_single.x1 = ptr[0] + ptr[2] / 2;
                candi_single.y1 = ptr[1] + ptr[3] / 2;
                float _confThresh = ptr[4];
                candi_single.confidence = ptr[4] * _score;
                candi_single.classIndx = classIndx;
                all_box.push_back(candi_single);
            }

            //计算nms
            nms(all_box, 0.4);

            //计算坐标映射关系
            int shift_x{ 0 }, shift_y{ 0 };
            float scale_x{ .0f }, scale_y{ .0f };

            //当前是直接resize
            scale_x = (float)width / original_width;
            scale_y = (float)height / original_height;

            for (size_t i = 0; i < all_box.size(); i++)
            {
                if (all_box[i].confidence > 0.7)
                {
                    int x1 = Max(0, (int)(all_box[i].x0 - shift_x) / scale_x);
                    int y1 = Max(0, (int)(all_box[i].y0 - shift_y) / scale_y);
                    int x2 = Min(original_width, (int)(all_box[i].x1 - shift_x) / scale_x);
                    int y2 = Min(original_height, (int)(all_box[i].y1 - shift_y) / scale_y);

                    cv::rectangle(srcMat, cv::Rect(x1, y1, x2 - x1, y2 - y1), cv::Scalar(255, 255, 255));

                    std::cout << "confidence: " << all_box[i].confidence << std::endl;
                }
            }
            cv::imshow("Image", srcMat);
            cv::waitKey(0);

        }
    }
}

在这里插入图片描述

Openvino的语义分割推理(ENet)

python端代码和结果

import openvino.runtime as ov
import cv2
import numpy as np
import matplotlib.pyplot as plt

if __name__ == '__main__':

    # 1、创建openvino初始化的引擎
    core = ov.Core()

    devices = core.available_devices
    for device in devices:
        device_name = core.get_property(device, "FULL_DEVICE_NAME")
        print(f"{device}: {device_name}")

    # 1-temp、加载自定义的op算子,强烈推荐使用OpenVINO的拓展接口来写自定义的算子
    # 通常用不到
    # core.add_extension("path_to_extension_library.so")

    # 2、从本地磁盘上读取模型
    model_path = "./model/ENet.xml"
    model = core.read_model(model_path)

    # 3.1 从设备上,加载模型
    # 1、2中只是读取模型文件,还没有编译,所以需要经过编译加载模型
    compiled_model = core.compile_model(model, "CPU")
    print(compiled_model)
    for i in range(len(compiled_model.inputs)):
        print(compiled_model.inputs[i].names, compiled_model.inputs[i].shape)

    for i in range(len(compiled_model.outputs)):
        print(compiled_model.outputs[i].names, compiled_model.outputs[i].shape)
    print("-------------------------------------")

    # 4. 创建推理请求
    infer_request = compiled_model.create_infer_request()

    # 5.填充数据
    # 5.1 通过tensor排序的下标获取tensor
    input_tensor1 = infer_request.get_input_tensor(0)
    # assert input_tensor1.data.dtype == np.int64
    # 5.2 也可通过tensor name获取tensor,因为用的网络只有一个输入,且输入的节点名为 inputs
    # 因此,两种方法获取的tensor应该是相等的
    input_tensor2 = infer_request.get_tensor("images")
    assert input_tensor2 != input_tensor1, "两个Tensor不相等"
    print("-------------------------------------")

    # 6 读取数据,预处理为float32的格式,转换通道等
    image_path = "./image/test.jpg"
    images = cv2.imdecode(np.fromfile(image_path, dtype=np.float32), cv2.IMREAD_COLOR)
    images = cv2.resize(images, (256, 256), interpolation=cv2.INTER_LINEAR)
    images = images / 255.
    images = images.transpose((2, 0, 1))
    images = np.array(images, np.float32)
    x = []
    x.append(images)
    x = np.array(x)
    print("-------------------------------------")
    
	res = infer_request.infer(inputs={"images": x})
    for key in res.keys():
        print(res[key].shape)

    # 6.2 获取结果方式2:通过get_output_tensor()
    output_tensor = infer_request.get_output_tensor(1)
    print(output_tensor.data.shape)

    plt.imshow(output_tensor.data[0], "gray")
    plt.colorbar(label='max value test')
    plt.show()
    print("hello world")

在这里插入图片描述
C++端代码和结果(OpenVINO Runtime API 2.0)

#pragma once
#include <opencv2/dnn.hpp>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>

using namespace std;

void OV_Seg(string& xml_path, string& img_path) {

    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // Step 1. Initialize OpenVINO Runtime Core
    ov::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.get_available_devices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    // std::shared_ptr<ov::Model> model = core.read_model(xml_path);
    // ov::CompiledModel compiled_model = core.compile_model(model, "CPU");

    auto compiled_model = core.compile_model(xml_path, "CPU");
    ov::InferRequest infer_request = compiled_model.create_infer_request();

    const auto inputShape = compiled_model.input().get_shape();
    std::unordered_set<std::string> inputNames = compiled_model.input().get_names();
    std::cout << "Input Tensor: " << *inputNames.begin() << "  Shape: " << inputShape << std::endl;


    std::vector<ov::Output<const ov::Node>> outputInfos = compiled_model.outputs();
    for (auto& outputInfo : outputInfos)
    {
        std::unordered_set<std::string> outputNames = outputInfo.get_names();
        const auto outputShape = outputInfo.get_shape();
        std::cout << "Output Tensor: " << *outputNames.begin() << "  Shape: " << outputShape << std::endl;
    }

    cv::Mat srcMat = cv::imread(img_path);

    auto original_height = srcMat.rows;
    auto original_width = srcMat.cols;
    
    cv::Size resizeShape(256, 256);
    cv::Mat matNormImage;
    preData(srcMat, resizeShape, matNormImage); // 减均值除方差

    ov::Tensor input_tensor = infer_request.get_input_tensor();
    float* const input_data = input_tensor.data<float>();
    for (int batch = 0; batch < inputShape[0]; batch++)
    {
        for (int h = 0; h < inputShape[2]; h++)
        {
            for (int w = 0; w < inputShape[3]; w++) 
            {
                for (int c = 0; c < inputShape[1]; c++)
                {
                    int out_index = batch * inputShape[1] * inputShape[2] * inputShape[3] + c * inputShape[2] * inputShape[3] + h * inputShape[3] + w;
                    input_data[out_index] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
                }

            }
        }

    }

    // Step 6. Create an infer request for model inference 
    infer_request.infer();

    //Step 7. Retrieve inference results 
    //const ov::Tensor& output_tensor = infer_request.get_tensor("argIdxMapImg");
    const ov::Tensor& output_tensor = infer_request.get_output_tensor(1);
    ov::Shape output_shape = output_tensor.get_shape();
    std::cout << "Output Shape: " << output_shape << std::endl;

    const float* result = output_tensor.data<const float>();

    vector<cv::Mat> masks;

    for (int batch = 0; batch < output_shape[0]; batch++)
    {
        cv::Mat mask = cv::Mat::zeros(output_shape[1], output_shape[2], CV_8UC1);
        for (int h = 0; h < output_shape[1]; h++)
        {
            for (int w = 0; w < output_shape[2]; w++)
            {
                int out_index = batch * output_shape[1] * output_shape[2]+ h * inputShape[3] + w;
                float out_value = result[out_index];
                mask.at<uchar>(h, w) = out_value;
            }
        }
        masks.push_back(mask);
        cv::namedWindow("Image", cv::WINDOW_NORMAL);
        cv::imshow("Image", mask*255);
        cv::waitKey(0);
        cv::destroyAllWindows(); 
    }
}

在这里插入图片描述
C++端代码和结果(Inference Engine API)

#include <iostream>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <opencv2/opencv.hpp>
#include "openvino/openvino.hpp"
#include <ie_core.hpp>
#include <opencv2/opencv.hpp>

void inference_seg(string& xml_path, string& bin_path, string& img_path) {

    // Get OpenVINO runtime version 
    std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;

    // Step 1. Initialize OpenVINO Runtime Core
    InferenceEngine::Core core;

    // Step 2. Get list of available devices
    std::vector<std::string> availableDevices = core.GetAvailableDevices();

    // Step 3. Query and print supported metrics and config keys 
    if (availableDevices.empty()) {
        std::cout << "No devices available." << std::endl;
    }
    else
    {
        std::cout << "Available devices: " << std::endl;
        for (auto&& device : availableDevices) {
            std::cout << device << std::endl;
        }
    }

    // Step 4. Read network structure and weights 读取网络结构和权重
    InferenceEngine::CNNNetwork model = core.ReadNetwork(xml_path, bin_path);


    // 获取输入名称和Tensor信息
    InferenceEngine::InputsDataMap inputsInfo = model.getInputsInfo();
    for (auto& inputInfo : inputsInfo) {
        const std::string& inputName = inputInfo.first;
        //std::cout << "Input Name: " << inputName << std::endl;

        const InferenceEngine::DataPtr& inputData = inputInfo.second->getInputData();
        const InferenceEngine::TensorDesc& inputDesc = inputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& inputDims = inputDesc.getDims();

        std::cout << "Input Tensor '" << inputName << "' Size: ";
        for (size_t dim : inputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }


    // 获取输入并进行设置(第一种方式)
    auto item = inputsInfo.begin();
    const std::string& image_info_name = item->first;//获取image_info输入的名字

    // 获取输出名称和Tensor信息
    InferenceEngine::OutputsDataMap outputsInfo = model.getOutputsInfo();
    for (auto& outputInfo : outputsInfo) {
        const std::string& outputName = outputInfo.first;
        //std::cout << "Output Name: " << outputName << std::endl;


        const InferenceEngine::DataPtr& outputData = outputInfo.second;
        const InferenceEngine::TensorDesc& outputDesc = outputData->getTensorDesc();

        // 获取Tensor的维度
        const InferenceEngine::SizeVector& outputDims = outputDesc.getDims();

        std::cout << "Output Tensor '" << outputName << "' Size: ";
        for (size_t dim : outputDims) {
            std::cout << dim << " ";
        }
        std::cout << std::endl;
    }

    // Step 5. Compilation Model 编译模型
    InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(model, "CPU");

    // 创建推理请求
    InferenceEngine::InferRequest infer_request = executableNetwork.CreateInferRequest();
    
    cv::Mat srcMat = cv::imread(img_path);
    cv::Size resizeShape(256, 256);
    cv::Mat matNormImage;
    preDataDet(srcMat, resizeShape, matNormImage); // 减均值除方差

    // 将图像加载到模型的输入Tensor中
    InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(image_info_name);
    const InferenceEngine::SizeVector input_dims = input_blob->getTensorDesc().getDims();
    const size_t channels = input_dims[1];
    const size_t height = input_dims[2];
    const size_t width = input_dims[3];

    // 将图像数据复制到输入Blob
    InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob)->wmap();
    float* blob_data = blobMapped.as<float*>();
    
    for (size_t h = 0; h < height; ++h) {
        for (size_t w = 0; w < width; ++w) {
            for (size_t c = 0; c < channels; ++c) {
                blob_data[c * height * width + h * width + w] = matNormImage.at<float>(h, w * channels + c);
                //blob_data[c * height * width + h * width + w] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
            }

        }
    }


    infer_request.Infer();

    // 获取推理结果
    const float* output_buffer = new float[width * height];
    for (auto& item : outputsInfo) {
        auto output_name = item.first;
        if (output_name == "argIdxMapImg")
        {
            auto output = infer_request.GetBlob(output_name);
            auto const memLocker = output->cbuffer(); // use const memory locker
            // output_buffer is valid as long as the lifetime of memLocker
            output_buffer = memLocker.as<const float*>();
            cv::Mat probImg = cv::Mat(cv::Size(width, height), CV_32FC1);
            //单张图
            memcpy(probImg.data, (const float*)output_buffer, (size_t)width * height * sizeof(float));
            cv::imshow("Image", probImg * 255);
            cv::waitKey(0);
        }
    }
}

在这里插入图片描述

问题记录:

Openvino的python端可以正确推理,但C++端无法识别出CPU(已解决)
在这里插入图片描述
在这里插入图片描述
这种问题大概率是属性配置里的openvino版本和输出文件夹里的动态库版本不一致(低级错误,注意避免)
在这里插入图片描述
在这里插入图片描述


后续记录:

持续更新OpenVino在不同任务场景的应用和出现的问题。

评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值