Openvino推理问题记录:
记录Openvino推理过程中的问题,欢迎各位大佬指教,多谢多谢!!!
Openvino的检测推理(YoloV5)
这里给出python端代码和结果
import glob, os
import onnxruntime as ort
import openvino.runtime as ov
import cv2
import numpy as np
import torch, torchvision
import time
from common.utils import cv_imread, cv_imwrite
def clip_coords(boxes, shape):
# Clip bounding xyxy bounding boxes to image shape (height, width)
if isinstance(boxes, torch.Tensor): # faster individually
boxes[:, 0].clamp_(0, shape[1]) # x1
boxes[:, 1].clamp_(0, shape[0]) # y1
boxes[:, 2].clamp_(0, shape[1]) # x2
boxes[:, 3].clamp_(0, shape[0]) # y2
else: # np.array (faster grouped)
boxes[:, [0, 2]] = boxes[:, [0, 2]].clip(0, shape[1]) # x1, x2
boxes[:, [1, 3]] = boxes[:, [1, 3]].clip(0, shape[0]) # y1, y2
def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
# Rescale coords (xyxy) from img1_shape to img0_shape
if ratio_pad is None: # calculate from img0_shape
gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1]) # gain = old / new
pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2 # wh padding
else:
gain = ratio_pad[0][0]
pad = ratio_pad[1]
coords[:, [0, 2]] -= pad[0] # x padding
coords[:, [1, 3]] -= pad[1] # y padding
coords[:, :4] /= gain
clip_coords(coords, img0_shape)
return coords
def box_iou(box1, box2):
# https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
"""
Return intersection-over-union (Jaccard index) of boxes.
Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
Arguments:
box1 (Tensor[N, 4])
box2 (Tensor[M, 4])
Returns:
iou (Tensor[N, M]): the NxM matrix containing the pairwise
IoU values for every element in boxes1 and boxes2
"""
def box_area(box):
# box = 4xn
return (box[2] - box[0]) * (box[3] - box[1])
area1 = box_area(box1.T)
area2 = box_area(box2.T)
# inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
return inter / (area1[:, None] + area2 - inter) # iou = inter / (area1 + area2 - inter)
def xywh2xyxy(x):
# Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
y = x.clone() if isinstance(x, torch.Tensor) else np.copy(x)
y[:, 0] = x[:, 0] - x[:, 2] / 2 # top left x
y[:, 1] = x[:, 1] - x[:, 3] / 2 # top left y
y[:, 2] = x[:, 0] + x[:, 2] / 2 # bottom right x
y[:, 3] = x[:, 1] + x[:, 3] / 2 # bottom right y
return y
def non_max_suppression(prediction, conf_thres=0.05, iou_thres=0.01, classes=None, agnostic=False, multi_label=False,
labels=(), max_det=300):
"""Runs Non-Maximum Suppression (NMS) on inference results
Returns:
list of detections, on (n,6) tensor per image [xyxy, conf, cls]
"""
nc = prediction.shape[2] - 5 # number of classes
xc = prediction[..., 4] > conf_thres # candidates
# Checks
assert 0 <= conf_thres <= 1, f'Invalid Confidence threshold {conf_thres}, valid values are between 0.0 and 1.0'
assert 0 <= iou_thres <= 1, f'Invalid IoU {iou_thres}, valid values are between 0.0 and 1.0'
# Settings
min_wh, max_wh = 2, 4096 # (pixels) minimum and maximum box width and height
max_nms = 30000 # maximum number of boxes into torchvision.ops.nms()
time_limit = 10.0 # seconds to quit after
redundant = True # require redundant detections
multi_label &= nc > 1 # multiple labels per box (adds 0.5ms/img)
merge = False # use merge-NMS
t = time.time()
output = [torch.zeros((0, 6), device=prediction.device)] * prediction.shape[0]
for xi, x in enumerate(prediction): # image index, image inference
# Apply constraints
x = x[xc[xi]] # confidence
# Cat apriri labels if autolabelling
if labels and len(labels[xi]):
l = labels[xi]
v = torch.zeros((len(l), nc + 5), device=x.device)
v[:, :4] = l[:, 1:5] # box
v[:, 4] = 1.0 # conf
v[range(len(l)), l[:, 0].long() + 5] = 1.0 # cls
x = torch.cat((x, v), 0)
# If none remain process next image
if not x.shape[0]:
continue
# Compute conf
x[:, 5:] *= x[:, 4:5] # conf = obj_conf * cls_conf
# Box (center x, center y, width, height) to (x1, y1, x2, y2)
box = xywh2xyxy(x[:, :4])
# Detections matrix nx6 (xyxy, conf, cls)
if multi_label:
i, j = (x[:, 5:] > conf_thres).nonzero(as_tuple=False).T
x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
else: # best class only
conf, j = x[:, 5:].max(1, keepdim=True)
x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
# Filter by class
if classes is not None:
x = x[(x[:, 5:6] == torch.tensor(classes, device=x.device)).any(1)]
# Check shape
n = x.shape[0] # number of boxes
if not n: # no boxes
continue
elif n > max_nms: # excess boxes
x = x[x[:, 4].argsort(descending=True)[:max_nms]] # sort by confidence
# Batched NMS
c = x[:, 5:6] * (0 if agnostic else max_wh) # classes
boxes, scores = x[:, :4] + c, x[:, 4] # boxes (offset by class), scores
i = torchvision.ops.nms(boxes, scores, iou_thres) # NMS
if i.shape[0] > max_det: # limit detections
i = i[:max_det]
if merge and (1 < n < 3E3): # Merge NMS (boxes merged using weighted mean)
# update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
iou = box_iou(boxes[i], boxes) > iou_thres # iou matrix
weights = iou * scores[None] # box weights
x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True) # merged boxes
if redundant:
i = i[iou.sum(1) > 1] # require redundancy
output[xi] = x[i]
if (time.time() - t) > time_limit:
print(f'WARNING: NMS time limit {time_limit}s exceeded')
break # time limit exceeded
return output
def letterbox(im, new_shape=(640, 640), color=(114, 114, 114), auto=True, scaleFill=False, scaleup=True, stride=32):
# Resize and pad image while meeting stride-multiple constraints
shape = im.shape[:2] # current shape [height, width]
if isinstance(new_shape, int):
new_shape = (new_shape, new_shape)
# Scale ratio (new / old)
r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
if not scaleup: # only scale down, do not scale up (for better val mAP)
r = min(r, 1.0)
# Compute padding
ratio = r, r # width, height ratios
new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r))
dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1] # wh padding
if auto: # minimum rectangle
dw, dh = np.mod(dw, stride), np.mod(dh, stride) # wh padding
elif scaleFill: # stretch
dw, dh = 0.0, 0.0
new_unpad = (new_shape[1], new_shape[0])
ratio = new_shape[1] / shape[1], new_shape[0] / shape[0] # width, height ratios
dw /= 2 # divide padding into 2 sides
dh /= 2
if shape[::-1] != new_unpad: # resize
im = cv2.resize(im, new_unpad, interpolation=cv2.INTER_LINEAR)
top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
im = cv2.copyMakeBorder(im, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color) # add border
return im, ratio, (dw, dh)
class YoloV5InferOpenvino:
def __init__(self, xml_path, device_id=0, input_size=(640, 640), score=0.45, iou_threa=0.45,
pad_color=(114, 114, 114)):
super(YoloV5InferOpenvino, self).__init__()
self.input_size = input_size
self.pad_color = pad_color
self.score = score
self.iou_threa = iou_threa
# 1、创建openvino初始化的引擎
core = ov.Core()
devices = core.available_devices
for device in devices:
device_name = core.get_property(device, "FULL_DEVICE_NAME")
print(f"{device}: {device_name}")
model_path = xml_path
model = core.read_model(model_path)
self.compiled_model = core.compile_model(model, "CPU")
self.infer_request = self.compiled_model.create_infer_request()
print(self.infer_request)
input_tensor1 = self.infer_request.get_input_tensor(0)
# assert input_tensor1.data.dtype == np.int64
# 5.2 也可通过tensor name获取tensor,因为用的网络只有一个输入,且输入的节点名为 inputs
# 因此,两种方法获取的tensor应该是相等的
input_tensor2 = self.infer_request.get_tensor("images")
def _preprocess(self, bgr_img):
img_size = self.input_size
color = self.pad_color
img_copy, ratio, (dw, dh) = letterbox(bgr_img, img_size, auto=False)
img = np.array(img_copy, np.float32)
img = img / 255.
# Convert
img = img.transpose((2, 0, 1))[::-1] # HWC to CHW, BGR to RGB
img = np.ascontiguousarray(img)
mul_batch_inputs = [img] # ,input2
x = np.array(mul_batch_inputs)
return x
def infer(self, img0):
# img0 = cv_imread(img_path)
x_input = self._preprocess(img0)
input_name = self.compiled_model.inputs[0].names
out_name = self.compiled_model.outputs[0].names
print("input_name = ", input_name)
pred_onnx_outs = self.infer_request.infer(inputs={"images": x_input})
output_tensor = self.infer_request.get_output_tensor(0).data
print(output_tensor.shape)
# for i in range(len(pred_onnx_outs)):
# print(self.onnx_session.get_outputs()[i].name, pred_onnx_outs[i].shape)
pred_onnx_out = output_tensor
pred_onnx_out = torch.tensor(pred_onnx_out)
# print(pred_onnx_out)
pred = non_max_suppression(pred_onnx_out, conf_thres=self.score, iou_thres=self.iou_threa, labels=[],
multi_label=True,
agnostic=False)
for i, det in enumerate(pred): # per image
if len(det):
det[:, :4] = scale_coords(x_input.shape[2:], det[:, :4], img0.shape).round()
det_numpy = det.numpy()
return det_numpy
else:
return []
if __name__ == "__main__":
CLASSES = ["a", "b", "c"]
color = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0), (255, 0, 0), (0, 255, 0), (0, 0, 255), (255, 255, 0)]
weight_path = "./model/yolov5.xml"
model = YoloV5InferOpenvino(weight_path, input_size=[640, 640], score=0.1)
img_path = "./img/test.jpg"
img0 = cv_imread(img_path)
det_numpy = model.infer(img0)
if len(det_numpy) > 0:
for i in range(det_numpy.shape[0]):
bbox = det_numpy[i]
xmin = int(bbox[0])
ymin = int(bbox[1])
xmax = int(bbox[2])
ymax = int(bbox[3])
score = float(bbox[4])
idx = int(bbox[5])
img0 = cv2.rectangle(img0, (xmin, ymin), (xmax, ymax), color[idx], 2) # filled
img0 = cv2.putText(img0, CLASSES[idx] + " " + str(score)[:6], (xmin, ymin), 1, 2, color[idx],
2)
cv2.imshow("name", img0)
cv2.waitKey(0)
C++端代码和结果(OpenVINO Runtime API 2.0)
#pragma once
#include <opencv2/dnn.hpp>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
std::vector<cv::Scalar> colors_map = { cv::Scalar(0, 0, 255) , cv::Scalar(0, 255, 0) , cv::Scalar(255, 0, 0) ,
cv::Scalar(255, 255, 0) , cv::Scalar(0, 255, 255) , cv::Scalar(255, 0, 255) };
const std::vector<std::string> class_names_Hawkeye = { "a", "b", "c" };
cv::Mat letterbox(cv::Mat& img, std::vector<float>& paddings, std::vector<int> new_shape = { 640, 640 })
{
int img_h = img.rows;
int img_w = img.cols;
// Compute scale ratio(new / old) and target resized shape
float scale = std::min(new_shape[1] * 1.0 / img_h, new_shape[0] * 1.0 / img_w);
int resize_h = int(round(img_h * scale));
int resize_w = int(round(img_w * scale));
paddings[0] = scale;
// Compute padding
int pad_h = new_shape[1] - resize_h;
int pad_w = new_shape[0] - resize_w;
// Resize and pad image while meeting stride-multiple constraints
cv::Mat resized_img;
cv::resize(img, resized_img, cv::Size(resize_w, resize_h));
// divide padding into 2 sides
float half_h = pad_h * 1.0 / 2;
float half_w = pad_w * 1.0 / 2;
paddings[1] = half_h;
paddings[2] = half_w;
// Compute padding boarder
int top = int(round(half_h - 0.1));
int bottom = int(round(half_h + 0.1));
int left = int(round(half_w - 0.1));
int right = int(round(half_w + 0.1));
// Add border
cv::copyMakeBorder(resized_img, resized_img, top, bottom, left, right, 0, cv::Scalar(114, 114, 114));
return resized_img;
}
void OV_Det(string& xml_path, string& img_path) {
// Get OpenVINO runtime version
std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;
// Step 1. Initialize OpenVINO Runtime Core
ov::Core core;
// Step 2. Get list of available devices
std::vector<std::string> availableDevices = core.get_available_devices();
// Step 3. Query and print supported metrics and config keys
if (availableDevices.empty()) {
std::cout << "No devices available." << std::endl;
}
else
{
std::cout << "Available devices: " << std::endl;
for (auto&& device : availableDevices) {
std::cout << device << std::endl;
}
}
// Step 4. Read network structure and weights 读取网络结构和权重
// std::shared_ptr<ov::Model> model = core.read_model(xml_path);
// ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
auto compiled_model = core.compile_model(xml_path, "CPU");
const auto inputShape = compiled_model.input().get_shape();
std::unordered_set<std::string> inputNames = compiled_model.input().get_names();
std::cout << "Input Tensor: " << *inputNames.begin() << " Shape: " << inputShape << std::endl;
std::vector<ov::Output<const ov::Node>> outputInfos = compiled_model.outputs();
for (auto& outputInfo : outputInfos)
{
std::unordered_set<std::string> outputNames = outputInfo.get_names();
const auto outputShape = outputInfo.get_shape();
std::cout << "Output Tensor: " << *outputNames.begin() << " Shape: " << outputShape << std::endl;
}
cv::Mat srcMat = cv::imread(img_path);
std::vector<float> paddings(3); //scale, half_h, half_w
std::vector<int> reshape = { 640, 640 };
cv::Mat resized_img = letterbox(srcMat, paddings, reshape);
// BGR->RGB, u8(0-255)->f32(0.0-1.0), HWC->NCHW
cv::Mat input_data = cv::dnn::blobFromImage(resized_img, 1 / 255.0, cv::Size(640, 640), cv::Scalar(0, 0, 0), true);
// Step 5. Create tensor from image
ov::Tensor input_tensor = ov::Tensor(compiled_model.input().get_element_type(), compiled_model.input().get_shape(), input_data.ptr(0));
// Step 6. Create an infer request for model inference
ov::InferRequest infer_request = compiled_model.create_infer_request();
infer_request.set_input_tensor(input_tensor);
infer_request.infer();
//Step 7. Retrieve inference results
const ov::Tensor& output_tensor = infer_request.get_tensor("output");
//const ov::Tensor& output_tensor = infer_request.get_output_tensor(1);
ov::Shape output_shape = output_tensor.get_shape();
std::cout << "Output Shape: " << output_shape << std::endl;
// Detect Matrix: 25200 x 8
cv::Mat detect_buffer(output_shape[1], output_shape[2], CV_32F, output_tensor.data());
float conf_threshold = 0.25;
float nms_threshold = 0.5;
std::vector<cv::Rect> boxes;
std::vector<int> class_ids;
std::vector<float> class_scores;
std::vector<float> confidences;
std::vector<cv::Mat> masks;
// cx,cy,w,h,confidence,c1,c2,...c80
float scale = paddings[0];
for (int i = 0; i < detect_buffer.rows; i++) {
float confidence = detect_buffer.at<float>(i, 4);
if (confidence < conf_threshold) {
continue;
}
cv::Mat classes_scores = detect_buffer.row(i).colRange(5, 8);
cv::Point class_id;
double score;
cv::minMaxLoc(classes_scores, NULL, &score, NULL, &class_id);
// class score: 0~1
if (score > 0.25)
{
float cx = detect_buffer.at<float>(i, 0);
float cy = detect_buffer.at<float>(i, 1);
float w = detect_buffer.at<float>(i, 2);
float h = detect_buffer.at<float>(i, 3);
int left = static_cast<int>((cx - 0.5 * w - paddings[2]) / scale);
int top = static_cast<int>((cy - 0.5 * h - paddings[1]) / scale);
int width = static_cast<int>(w / scale);
int height = static_cast<int>(h / scale);
cv::Rect box;
box.x = left;
box.y = top;
box.width = width;
box.height = height;
boxes.push_back(box);
class_ids.push_back(class_id.x);
class_scores.push_back(score);
confidences.push_back(confidence);
}
}
// NMS
std::vector<int> indices;
cv::dnn::NMSBoxes(boxes, confidences, conf_threshold, nms_threshold, indices);
cv::Mat rgb_mask = cv::Mat::zeros(srcMat.size(), srcMat.type());
// -------- Step 8. Visualize the detection results -----------
for (size_t i = 0; i < indices.size(); i++) {
int index = indices[i];
int class_id = class_ids[index];
cv::rectangle(srcMat, boxes[index], colors_map[class_id % 6], 2, 8);
std::string label = class_names[class_id] + ":" + std::to_string(class_scores[index]);
cv::putText(srcMat, label, cv::Point(boxes[index].tl().x, boxes[index].tl().y - 10), cv::FONT_HERSHEY_SIMPLEX, .5, colors_map[class_id % 6]);
}
cv::namedWindow("YOLOv5 OpenVINO Inference C++ Demo");
cv::imshow("YOLOv5 OpenVINO Inference C++ Demo", srcMat);
cv::waitKey(0);
cv::destroyAllWindows();
}
C++端代码和结果(Inference Engine API)
void inference_Det(string& xml_path, string& bin_path, string& img_path) {
// Get OpenVINO runtime version
std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;
// Step 1. Initialize OpenVINO Runtime Core
InferenceEngine::Core core;
// Step 2. Get list of available devices
std::vector<std::string> availableDevices = core.GetAvailableDevices();
// Step 3. Query and print supported metrics and config keys
if (availableDevices.empty()) {
std::cout << "No devices available." << std::endl;
}
else
{
std::cout << "Available devices: " << std::endl;
for (auto&& device : availableDevices) {
std::cout << device << std::endl;
}
}
// Step 4. Read network structure and weights 读取网络结构和权重
InferenceEngine::CNNNetwork model = core.ReadNetwork(xml_path, bin_path);
// 获取输入名称和Tensor信息
InferenceEngine::InputsDataMap inputsInfo = model.getInputsInfo();
for (auto& inputInfo : inputsInfo) {
const std::string& inputName = inputInfo.first;
//std::cout << "Input Name: " << inputName << std::endl;
const InferenceEngine::DataPtr& inputData = inputInfo.second->getInputData();
const InferenceEngine::TensorDesc& inputDesc = inputData->getTensorDesc();
// 获取Tensor的维度
const InferenceEngine::SizeVector& inputDims = inputDesc.getDims();
std::cout << "Input Tensor '" << inputName << "' Size: ";
for (size_t dim : inputDims) {
std::cout << dim << " ";
}
std::cout << std::endl;
}
// 获取输入并进行设置(第一种方式)
auto item = inputsInfo.begin();
const std::string& image_info_name = item->first;//获取image_info输入的名字
// 获取输出名称和Tensor信息
InferenceEngine::OutputsDataMap outputsInfo = model.getOutputsInfo();
for (auto& outputInfo : outputsInfo) {
const std::string& outputName = outputInfo.first;
//std::cout << "Output Name: " << outputName << std::endl;
const InferenceEngine::DataPtr& outputData = outputInfo.second;
const InferenceEngine::TensorDesc& outputDesc = outputData->getTensorDesc();
// 获取Tensor的维度
const InferenceEngine::SizeVector& outputDims = outputDesc.getDims();
std::cout << "Output Tensor '" << outputName << "' Size: ";
for (size_t dim : outputDims) {
std::cout << dim << " ";
}
std::cout << std::endl;
}
// Step 5. Compilation Model 编译模型
InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(model, "CPU");
// 创建推理请求
InferenceEngine::InferRequest infer_request = executableNetwork.CreateInferRequest();
cv::Mat srcMat = cv::imread(img_path);
/* 记录图片原始H,W */
auto original_height = srcMat.rows;
auto original_width = srcMat.cols;
cv::Size resizeShape(640, 640);
cv::Mat matNormImage;
preDataDet(srcMat, resizeShape, matNormImage); // 减均值除方差
// 将图像加载到模型的输入Tensor中
InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(image_info_name);
const InferenceEngine::SizeVector input_dims = input_blob->getTensorDesc().getDims();
const size_t channels = input_dims[1];
const size_t height = input_dims[2];
const size_t width = input_dims[3];
// 将图像数据复制到输入Blob
InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob)->wmap();
float* blob_data = blobMapped.as<float*>();
for (size_t h = 0; h < height; ++h) {
for (size_t w = 0; w < width; ++w) {
for (size_t c = 0; c < channels; ++c) {
blob_data[c * height * width + h * width + w] = matNormImage.at<float>(h, w * channels + c);
//blob_data[c * height * width + h * width + w] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
}
}
}
infer_request.Infer();
int img_w = 8;
int img_h = 25200;
// 获取推理结果
const float* output_buffer = new float[img_w * img_h];
for (auto& item : outputsInfo) {
auto output_name = item.first;
if (output_name == "output")
{
auto output = infer_request.GetBlob(output_name);
auto const memLocker = output->cbuffer(); // use const memory locker
// output_buffer is valid as long as the lifetime of memLocker
output_buffer = memLocker.as<const float*>();
cv::Mat probImg = cv::Mat(cv::Size(img_w, img_h), CV_32FC1);
//单张图
memcpy(probImg.data, (const float*)output_buffer, (size_t)img_w * img_h * sizeof(float));
//std::cout << probImg.rows << std::endl;
std::vector<int> candi;
for (size_t y = 0; y < probImg.rows; ++y)
{
float tmpVal = probImg.at<float>(y, 4);
//std::cout << y << " " << tmpVal << std::endl;
if (tmpVal > 0.7)
{
std::cout << y << " " << tmpVal << std::endl;
candi.push_back(y);
}
}
//Detections matrix nx6 (xyxy, conf, cls)
std::vector<TDetectBbox> all_box;
for (size_t g = 0; g < candi.size(); ++g)
{
float* ptr = probImg.ptr<float>(candi[g]); //候选行的首地址
TDetectBbox candi_single;
std::vector<float> score;
for (size_t m = 0; m < 3; ++m) //3个类别 获取类别得分
{
float _temVal = ptr[m + 5];
score.push_back(_temVal);
}
//返回score的最大值和index
float _score{ 0.0 };
int classIndx = findVectorMax(score, _score); // 获取得分最高的类别和得分
candi_single.x0 = ptr[0] - ptr[2] / 2; //目标框中心点和宽高xywh转左上右下顶点xyxy
candi_single.y0 = ptr[1] - ptr[3] / 2;
candi_single.x1 = ptr[0] + ptr[2] / 2;
candi_single.y1 = ptr[1] + ptr[3] / 2;
float _confThresh = ptr[4];
candi_single.confidence = ptr[4] * _score;
candi_single.classIndx = classIndx;
all_box.push_back(candi_single);
}
//计算nms
nms(all_box, 0.4);
//计算坐标映射关系
int shift_x{ 0 }, shift_y{ 0 };
float scale_x{ .0f }, scale_y{ .0f };
//当前是直接resize
scale_x = (float)width / original_width;
scale_y = (float)height / original_height;
for (size_t i = 0; i < all_box.size(); i++)
{
if (all_box[i].confidence > 0.7)
{
int x1 = Max(0, (int)(all_box[i].x0 - shift_x) / scale_x);
int y1 = Max(0, (int)(all_box[i].y0 - shift_y) / scale_y);
int x2 = Min(original_width, (int)(all_box[i].x1 - shift_x) / scale_x);
int y2 = Min(original_height, (int)(all_box[i].y1 - shift_y) / scale_y);
cv::rectangle(srcMat, cv::Rect(x1, y1, x2 - x1, y2 - y1), cv::Scalar(255, 255, 255));
std::cout << "confidence: " << all_box[i].confidence << std::endl;
}
}
cv::imshow("Image", srcMat);
cv::waitKey(0);
}
}
}
Openvino的语义分割推理(ENet)
python端代码和结果
import openvino.runtime as ov
import cv2
import numpy as np
import matplotlib.pyplot as plt
if __name__ == '__main__':
# 1、创建openvino初始化的引擎
core = ov.Core()
devices = core.available_devices
for device in devices:
device_name = core.get_property(device, "FULL_DEVICE_NAME")
print(f"{device}: {device_name}")
# 1-temp、加载自定义的op算子,强烈推荐使用OpenVINO的拓展接口来写自定义的算子
# 通常用不到
# core.add_extension("path_to_extension_library.so")
# 2、从本地磁盘上读取模型
model_path = "./model/ENet.xml"
model = core.read_model(model_path)
# 3.1 从设备上,加载模型
# 1、2中只是读取模型文件,还没有编译,所以需要经过编译加载模型
compiled_model = core.compile_model(model, "CPU")
print(compiled_model)
for i in range(len(compiled_model.inputs)):
print(compiled_model.inputs[i].names, compiled_model.inputs[i].shape)
for i in range(len(compiled_model.outputs)):
print(compiled_model.outputs[i].names, compiled_model.outputs[i].shape)
print("-------------------------------------")
# 4. 创建推理请求
infer_request = compiled_model.create_infer_request()
# 5.填充数据
# 5.1 通过tensor排序的下标获取tensor
input_tensor1 = infer_request.get_input_tensor(0)
# assert input_tensor1.data.dtype == np.int64
# 5.2 也可通过tensor name获取tensor,因为用的网络只有一个输入,且输入的节点名为 inputs
# 因此,两种方法获取的tensor应该是相等的
input_tensor2 = infer_request.get_tensor("images")
assert input_tensor2 != input_tensor1, "两个Tensor不相等"
print("-------------------------------------")
# 6 读取数据,预处理为float32的格式,转换通道等
image_path = "./image/test.jpg"
images = cv2.imdecode(np.fromfile(image_path, dtype=np.float32), cv2.IMREAD_COLOR)
images = cv2.resize(images, (256, 256), interpolation=cv2.INTER_LINEAR)
images = images / 255.
images = images.transpose((2, 0, 1))
images = np.array(images, np.float32)
x = []
x.append(images)
x = np.array(x)
print("-------------------------------------")
res = infer_request.infer(inputs={"images": x})
for key in res.keys():
print(res[key].shape)
# 6.2 获取结果方式2:通过get_output_tensor()
output_tensor = infer_request.get_output_tensor(1)
print(output_tensor.data.shape)
plt.imshow(output_tensor.data[0], "gray")
plt.colorbar(label='max value test')
plt.show()
print("hello world")
C++端代码和结果(OpenVINO Runtime API 2.0)
#pragma once
#include <opencv2/dnn.hpp>
#include <openvino/openvino.hpp>
#include <opencv2/opencv.hpp>
using namespace std;
void OV_Seg(string& xml_path, string& img_path) {
// Get OpenVINO runtime version
std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;
// Step 1. Initialize OpenVINO Runtime Core
ov::Core core;
// Step 2. Get list of available devices
std::vector<std::string> availableDevices = core.get_available_devices();
// Step 3. Query and print supported metrics and config keys
if (availableDevices.empty()) {
std::cout << "No devices available." << std::endl;
}
else
{
std::cout << "Available devices: " << std::endl;
for (auto&& device : availableDevices) {
std::cout << device << std::endl;
}
}
// Step 4. Read network structure and weights 读取网络结构和权重
// std::shared_ptr<ov::Model> model = core.read_model(xml_path);
// ov::CompiledModel compiled_model = core.compile_model(model, "CPU");
auto compiled_model = core.compile_model(xml_path, "CPU");
ov::InferRequest infer_request = compiled_model.create_infer_request();
const auto inputShape = compiled_model.input().get_shape();
std::unordered_set<std::string> inputNames = compiled_model.input().get_names();
std::cout << "Input Tensor: " << *inputNames.begin() << " Shape: " << inputShape << std::endl;
std::vector<ov::Output<const ov::Node>> outputInfos = compiled_model.outputs();
for (auto& outputInfo : outputInfos)
{
std::unordered_set<std::string> outputNames = outputInfo.get_names();
const auto outputShape = outputInfo.get_shape();
std::cout << "Output Tensor: " << *outputNames.begin() << " Shape: " << outputShape << std::endl;
}
cv::Mat srcMat = cv::imread(img_path);
auto original_height = srcMat.rows;
auto original_width = srcMat.cols;
cv::Size resizeShape(256, 256);
cv::Mat matNormImage;
preData(srcMat, resizeShape, matNormImage); // 减均值除方差
ov::Tensor input_tensor = infer_request.get_input_tensor();
float* const input_data = input_tensor.data<float>();
for (int batch = 0; batch < inputShape[0]; batch++)
{
for (int h = 0; h < inputShape[2]; h++)
{
for (int w = 0; w < inputShape[3]; w++)
{
for (int c = 0; c < inputShape[1]; c++)
{
int out_index = batch * inputShape[1] * inputShape[2] * inputShape[3] + c * inputShape[2] * inputShape[3] + h * inputShape[3] + w;
input_data[out_index] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
}
}
}
}
// Step 6. Create an infer request for model inference
infer_request.infer();
//Step 7. Retrieve inference results
//const ov::Tensor& output_tensor = infer_request.get_tensor("argIdxMapImg");
const ov::Tensor& output_tensor = infer_request.get_output_tensor(1);
ov::Shape output_shape = output_tensor.get_shape();
std::cout << "Output Shape: " << output_shape << std::endl;
const float* result = output_tensor.data<const float>();
vector<cv::Mat> masks;
for (int batch = 0; batch < output_shape[0]; batch++)
{
cv::Mat mask = cv::Mat::zeros(output_shape[1], output_shape[2], CV_8UC1);
for (int h = 0; h < output_shape[1]; h++)
{
for (int w = 0; w < output_shape[2]; w++)
{
int out_index = batch * output_shape[1] * output_shape[2]+ h * inputShape[3] + w;
float out_value = result[out_index];
mask.at<uchar>(h, w) = out_value;
}
}
masks.push_back(mask);
cv::namedWindow("Image", cv::WINDOW_NORMAL);
cv::imshow("Image", mask*255);
cv::waitKey(0);
cv::destroyAllWindows();
}
}
C++端代码和结果(Inference Engine API)
#include <iostream>
#include <algorithm>
#include <cmath>
#include <cstdlib>
#include <fstream>
#include <sstream>
#include <opencv2/opencv.hpp>
#include "openvino/openvino.hpp"
#include <ie_core.hpp>
#include <opencv2/opencv.hpp>
void inference_seg(string& xml_path, string& bin_path, string& img_path) {
// Get OpenVINO runtime version
std::cout << ov::get_openvino_version().description << ':' << ov::get_openvino_version().buildNumber << std::endl;
// Step 1. Initialize OpenVINO Runtime Core
InferenceEngine::Core core;
// Step 2. Get list of available devices
std::vector<std::string> availableDevices = core.GetAvailableDevices();
// Step 3. Query and print supported metrics and config keys
if (availableDevices.empty()) {
std::cout << "No devices available." << std::endl;
}
else
{
std::cout << "Available devices: " << std::endl;
for (auto&& device : availableDevices) {
std::cout << device << std::endl;
}
}
// Step 4. Read network structure and weights 读取网络结构和权重
InferenceEngine::CNNNetwork model = core.ReadNetwork(xml_path, bin_path);
// 获取输入名称和Tensor信息
InferenceEngine::InputsDataMap inputsInfo = model.getInputsInfo();
for (auto& inputInfo : inputsInfo) {
const std::string& inputName = inputInfo.first;
//std::cout << "Input Name: " << inputName << std::endl;
const InferenceEngine::DataPtr& inputData = inputInfo.second->getInputData();
const InferenceEngine::TensorDesc& inputDesc = inputData->getTensorDesc();
// 获取Tensor的维度
const InferenceEngine::SizeVector& inputDims = inputDesc.getDims();
std::cout << "Input Tensor '" << inputName << "' Size: ";
for (size_t dim : inputDims) {
std::cout << dim << " ";
}
std::cout << std::endl;
}
// 获取输入并进行设置(第一种方式)
auto item = inputsInfo.begin();
const std::string& image_info_name = item->first;//获取image_info输入的名字
// 获取输出名称和Tensor信息
InferenceEngine::OutputsDataMap outputsInfo = model.getOutputsInfo();
for (auto& outputInfo : outputsInfo) {
const std::string& outputName = outputInfo.first;
//std::cout << "Output Name: " << outputName << std::endl;
const InferenceEngine::DataPtr& outputData = outputInfo.second;
const InferenceEngine::TensorDesc& outputDesc = outputData->getTensorDesc();
// 获取Tensor的维度
const InferenceEngine::SizeVector& outputDims = outputDesc.getDims();
std::cout << "Output Tensor '" << outputName << "' Size: ";
for (size_t dim : outputDims) {
std::cout << dim << " ";
}
std::cout << std::endl;
}
// Step 5. Compilation Model 编译模型
InferenceEngine::ExecutableNetwork executableNetwork = core.LoadNetwork(model, "CPU");
// 创建推理请求
InferenceEngine::InferRequest infer_request = executableNetwork.CreateInferRequest();
cv::Mat srcMat = cv::imread(img_path);
cv::Size resizeShape(256, 256);
cv::Mat matNormImage;
preDataDet(srcMat, resizeShape, matNormImage); // 减均值除方差
// 将图像加载到模型的输入Tensor中
InferenceEngine::Blob::Ptr input_blob = infer_request.GetBlob(image_info_name);
const InferenceEngine::SizeVector input_dims = input_blob->getTensorDesc().getDims();
const size_t channels = input_dims[1];
const size_t height = input_dims[2];
const size_t width = input_dims[3];
// 将图像数据复制到输入Blob
InferenceEngine::LockedMemory<void> blobMapped = InferenceEngine::as<InferenceEngine::MemoryBlob>(input_blob)->wmap();
float* blob_data = blobMapped.as<float*>();
for (size_t h = 0; h < height; ++h) {
for (size_t w = 0; w < width; ++w) {
for (size_t c = 0; c < channels; ++c) {
blob_data[c * height * width + h * width + w] = matNormImage.at<float>(h, w * channels + c);
//blob_data[c * height * width + h * width + w] = matNormImage.at<cv::Vec<float, 3>>(h, w)[c];
}
}
}
infer_request.Infer();
// 获取推理结果
const float* output_buffer = new float[width * height];
for (auto& item : outputsInfo) {
auto output_name = item.first;
if (output_name == "argIdxMapImg")
{
auto output = infer_request.GetBlob(output_name);
auto const memLocker = output->cbuffer(); // use const memory locker
// output_buffer is valid as long as the lifetime of memLocker
output_buffer = memLocker.as<const float*>();
cv::Mat probImg = cv::Mat(cv::Size(width, height), CV_32FC1);
//单张图
memcpy(probImg.data, (const float*)output_buffer, (size_t)width * height * sizeof(float));
cv::imshow("Image", probImg * 255);
cv::waitKey(0);
}
}
}
问题记录:
Openvino的python端可以正确推理,但C++端无法识别出CPU
(已解决)
这种问题大概率是属性配置里的openvino版本和输出文件夹里的动态库版本不一致(低级错误,注意避免)
后续记录:
持续更新OpenVino在不同任务场景的应用和出现的问题。