Keyword arguments:
local_path -- 本地文件存储路径
link -- 需要下载的url
checksum_reference -- expected MD5 checksum of the file
"""</span>
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> os.path.exists(local_path):
print(<span class="hljs-string">'Downloading from %s, this may take a while...'</span> % link)
wget.download(link, local_path)
print()
<span class="hljs-keyword">if</span> checksum_reference <span class="hljs-keyword">is</span> <span class="hljs-keyword">not</span> <span class="hljs-keyword">None</span>:
checksum = generate_md5_checksum(local_path)
<span class="hljs-keyword">if</span> checksum != checksum_reference:
<span class="hljs-keyword">raise</span> ValueError(
<span class="hljs-string">'The MD5 checksum of local file %s differs from %s, please manually remove \
the file and try again.'</span> %
(local_path, checksum_reference))
<span class="hljs-keyword">return</span> local_path
defmain():
<span class="hljs-string">"""Run the DarkNet-to-ONNX conversion for YOLOv3-608."""</span>
<span class="hljs-comment"># 注释掉下面的部分,</span>
# if sys.version_info[0] > 2: # raise Exception(“This is script is only compatible with python2, please re-run this script </span> # with python2. The rest of this sample can be run with either version of python”)
Keyword arguments:
image_raw -- a raw PIL Image
bboxes -- NumPy array containing the bounding box coordinates of N objects, with shape (N,4).
categories -- NumPy array containing the corresponding category for each object,
with shape (N,)
confidences -- NumPy array containing the corresponding confidence for each object,
with shape (N,)
all_categories -- a list of all categories in the correct ordered (required for looking up
the category name)
bbox_color -- an optional string specifying the color of the bounding boxes (default: 'blue')
"""</span>
draw = ImageDraw.Draw(image_raw)
print(bboxes, confidences, categories)
<span class="hljs-keyword">for</span> box, score, category <span class="hljs-keyword">in</span> zip(bboxes, confidences, categories):
x_coord, y_coord, width, height = box
left = max(<span class="hljs-number">0</span>, np.floor(x_coord + <span class="hljs-number">0.5</span>).astype(int))
top = max(<span class="hljs-number">0</span>, np.floor(y_coord + <span class="hljs-number">0.5</span>).astype(int))
right = min(image_raw.width, np.floor(x_coord + width + <span class="hljs-number">0.5</span>).astype(int))
bottom = min(image_raw.height, np.floor(y_coord + height + <span class="hljs-number">0.5</span>).astype(int))
draw.rectangle(((left, top), (right, bottom)), outline=bbox_color)
draw.text((left, top - <span class="hljs-number">12</span>), <span class="hljs-string">'{0} {1:.2f}'</span>.format(all_categories[category], score), fill=bbox_color)
<span class="hljs-keyword">return</span> image_raw
defget_engine(onnx_file_path, engine_file_path=""): “”“如果已经有序列化engine,则直接用,否则构建新的tensorrt engine然后保存.”"" # 闭包 defbuild_engine(): “”“Takes an ONNX file and creates a TensorRT engine to run inference with”"" with trt.Builder(TRT_LOGGER) as builder, builder.create_network() as network, trt.OnnxParser(network, TRT_LOGGER) as parser:
builder.max_workspace_size = <span class="hljs-number">1</span> << <span class="hljs-number">30</span> <span class="hljs-comment"># 1GB</span>
builder.max_batch_size = <span class="hljs-number">1</span>
<span class="hljs-comment"># 解析模型文件</span>
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> os.path.exists(onnx_file_path):
print(<span class="hljs-string">'ONNX file {} not found, please run yolov3_to_onnx.py first to generate it.'</span>.format(onnx_file_path))
exit(<span class="hljs-number">0</span>)
print(<span class="hljs-string">'Loading ONNX file from path {}...'</span>.format(onnx_file_path))
<span class="hljs-keyword">with</span> open(onnx_file_path, <span class="hljs-string">'rb'</span>) <span class="hljs-keyword">as</span> model:
print(<span class="hljs-string">'Beginning ONNX file parsing'</span>)
parser.parse(model.read())
print(<span class="hljs-string">'Completed parsing of ONNX file'</span>)
print(<span class="hljs-string">'Building an engine from file {}; this may take a while...'</span>.format(onnx_file_path))
engine = builder.build_cuda_engine(network)
print(<span class="hljs-string">"Completed creating Engine"</span>)
<span class="hljs-keyword">with</span> open(engine_file_path, <span class="hljs-string">"wb"</span>) <span class="hljs-keyword">as</span> f:
f.write(engine.serialize())
<span class="hljs-keyword">return</span> engine
<span class="hljs-keyword">if</span> os.path.exists(engine_file_path):
<span class="hljs-comment"># 如果序列化engine已经存在,那么就直接跳过构建部分.</span>
print(<span class="hljs-string">"Reading engine from file {}"</span>.format(engine_file_path))
<span class="hljs-keyword">with</span> open(engine_file_path, <span class="hljs-string">"rb"</span>) <span class="hljs-keyword">as</span> f, \
trt.Runtime(TRT_LOGGER) <span class="hljs-keyword">as</span> runtime:
<span class="hljs-keyword">return</span> runtime.deserialize_cuda_engine(f.read())
<span class="hljs-keyword">else</span>:
<span class="hljs-keyword">return</span> build_engine()
defmain(): “”“Create a TensorRT engine for ONNX-based YOLOv3-608 and run inference.”""
import math
from PIL import Image
import numpy as np
# YOLOv3-608 has been trained with these 80 categories from COCO: # Lin, Tsung-Yi, et al. “Microsoft COCO: Common Objects in Context.” # European Conference on Computer Vision. Springer, Cham, 2014.
defload_label_categories(label_file_path): categories = [line.rstrip(’\n’) for line in open(label_file_path)] return categories
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">__init__</span><span class="hljs-params">(self,
yolo_masks,
yolo_anchors,
obj_threshold,
nms_threshold,
yolo_input_resolution)</span>:</span>
<span class="hljs-string">"""Initialize with all values that will be kept when processing several frames.
Assuming 3 outputs of the network in the case of (large) YOLOv3.
Keyword arguments:
yolo_masks -- a list of 3 three-dimensional tuples for the YOLO masks
yolo_anchors -- a list of 9 two-dimensional tuples for the YOLO anchors
object_threshold -- threshold for object coverage, float value between 0 and 1
nms_threshold -- threshold for non-max suppression algorithm,
float value between 0 and 1
input_resolution_yolo -- two-dimensional tuple with the target network's (spatial)
input resolution in HW order
"""</span>
self.masks = yolo_masks
self.anchors = yolo_anchors
self.object_threshold = obj_threshold
self.nms_threshold = nms_threshold
self.input_resolution_yolo = yolo_input_resolution
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">process</span><span class="hljs-params">(self, outputs, resolution_raw)</span>:</span>
<span class="hljs-string">"""Take the YOLOv3 outputs generated from a TensorRT forward pass, post-process them
and return a list of bounding boxes for detected object together with their category
and their confidences in separate lists.
Keyword arguments:
outputs -- outputs from a TensorRT engine in NCHW format
resolution_raw -- the original spatial resolution from the input PIL image in WH order
"""</span>
outputs_reshaped = list()
<span class="hljs-keyword">for</span> output <span class="hljs-keyword">in</span> outputs:
outputs_reshaped.append(self._reshape_output(output))
boxes, categories, confidences = self._process_yolo_output(
outputs_reshaped, resolution_raw)
<span class="hljs-keyword">return</span> boxes, categories, confidences
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">_reshape_output</span><span class="hljs-params">(self, output)</span>:</span>
<span class="hljs-string">"""Reshape a TensorRT output from NCHW to NHWC format (with expected C=255),
and then return it in (height,width,3,85) dimensionality after further reshaping.
Keyword argument:
output -- an output from a TensorRT engine after inference
"""</span>
output = np.transpose(output, [<span class="hljs-number">0</span>, <span class="hljs-number">2</span>, <span class="hljs-number">3</span>, <span class="hljs-number">1</span>])
_, height, width, _ = output.shape
dim1, dim2 = height, width
dim3 = <span class="hljs-number">3</span>
<span class="hljs-comment"># There are CATEGORY_NUM=80 object categories:</span>
dim4 = (<span class="hljs-number">4</span> + <span class="hljs-number">1</span> + CATEGORY_NUM)
<span class="hljs-keyword">return</span> np.reshape(output, (dim1, dim2, dim3, dim4))
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">_process_yolo_output</span><span class="hljs-params">(self, outputs_reshaped, resolution_raw)</span>:</span>
<span class="hljs-string">"""Take in a list of three reshaped YOLO outputs in (height,width,3,85) shape and return
return a list of bounding boxes for detected object together with their category and their
confidences in separate lists.
Keyword arguments:
outputs_reshaped -- list of three reshaped YOLO outputs as NumPy arrays
with shape (height,width,3,85)
resolution_raw -- the original spatial resolution from the input PIL image in WH order
"""</span>
<span class="hljs-comment"># E.g. in YOLOv3-608, there are three output tensors, which we associate with their</span>
<span class="hljs-comment"># respective masks. Then we iterate through all output-mask pairs and generate candidates</span>
<span class="hljs-comment"># for bounding boxes, their corresponding category predictions and their confidences:</span>
boxes, categories, confidences = list(), list(), list()
<span class="hljs-keyword">for</span> output, mask <span class="hljs-keyword">in</span> zip(outputs_reshaped, self.masks):
box, category, confidence = self._process_feats(output, mask)
box, category, confidence = self._filter_boxes(box, category, confidence)
boxes.append(box)
categories.append(category)
confidences.append(confidence)
boxes = np.concatenate(boxes)
categories = np.concatenate(categories)
confidences = np.concatenate(confidences)
<span class="hljs-comment"># Scale boxes back to original image shape:</span>
width, height = resolution_raw
image_dims = [width, height, width, height]
boxes = boxes * image_dims
<span class="hljs-comment"># Using the candidates from the previous (loop) step, we apply the non-max suppression</span>
<span class="hljs-comment"># algorithm that clusters adjacent bounding boxes to a single bounding box:</span>
nms_boxes, nms_categories, nscores = list(), list(), list()
<span class="hljs-keyword">for</span> category <span class="hljs-keyword">in</span> set(categories):
idxs = np.where(categories == category)
box = boxes[idxs]
category = categories[idxs]
confidence = confidences[idxs]
keep = self._nms_boxes(box, confidence)
nms_boxes.append(box[keep])
nms_categories.append(category[keep])
nscores.append(confidence[keep])
<span class="hljs-keyword">if</span> <span class="hljs-keyword">not</span> nms_categories <span class="hljs-keyword">and</span> <span class="hljs-keyword">not</span> nscores:
<span class="hljs-keyword">return</span> <span class="hljs-keyword">None</span>, <span class="hljs-keyword">None</span>, <span class="hljs-keyword">None</span>
boxes = np.concatenate(nms_boxes)
categories = np.concatenate(nms_categories)
confidences = np.concatenate(nscores)
<span class="hljs-keyword">return</span> boxes, categories, confidences
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">_process_feats</span><span class="hljs-params">(self, output_reshaped, mask)</span>:</span>
<span class="hljs-string">"""Take in a reshaped YOLO output in height,width,3,85 format together with its
corresponding YOLO mask and return the detected bounding boxes, the confidence,
and the class probability in each cell/pixel.
Keyword arguments:
output_reshaped -- reshaped YOLO output as NumPy arrays with shape (height,width,3,85)
mask -- 2-dimensional tuple with mask specification for this output
"""</span>
<span class="hljs-comment"># Two in-line functions required for calculating the bounding box</span>
<span class="hljs-comment"># descriptors:</span>
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">sigmoid</span><span class="hljs-params">(value)</span>:</span>
<span class="hljs-string">"""Return the sigmoid of the input."""</span>
<span class="hljs-keyword">return</span> <span class="hljs-number">1.0</span> / (<span class="hljs-number">1.0</span> + math.exp(-value))
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">exponential</span><span class="hljs-params">(value)</span>:</span>
<span class="hljs-string">"""Return the exponential of the input."""</span>
<span class="hljs-keyword">return</span> math.exp(value)
<span class="hljs-comment"># Vectorized calculation of above two functions:</span>
sigmoid_v = np.vectorize(sigmoid)
exponential_v = np.vectorize(exponential)
grid_h, grid_w, _, _ = output_reshaped.shape
anchors = [self.anchors[i] <span class="hljs-keyword">for</span> i <span class="hljs-keyword">in</span> mask]
<span class="hljs-comment"># Reshape to N, height, width, num_anchors, box_params:</span>
anchors_tensor = np.reshape(anchors, [<span class="hljs-number">1</span>, <span class="hljs-number">1</span>, len(anchors), <span class="hljs-number">2</span>])
box_xy = sigmoid_v(output_reshaped[..., :<span class="hljs-number">2</span>])
box_wh = exponential_v(output_reshaped[..., <span class="hljs-number">2</span>:<span class="hljs-number">4</span>]) * anchors_tensor
box_confidence = sigmoid_v(output_reshaped[..., <span class="hljs-number">4</span>])
box_confidence = np.expand_dims(box_confidence, axis=<span class="hljs-number">-1</span>)
box_class_probs = sigmoid_v(output_reshaped[..., <span class="hljs-number">5</span>:])
col = np.tile(np.arange(<span class="hljs-number">0</span>, grid_w), grid_w).reshape(<span class="hljs-number">-1</span>, grid_w)
row = np.tile(np.arange(<span class="hljs-number">0</span>, grid_h).reshape(<span class="hljs-number">-1</span>, <span class="hljs-number">1</span>), grid_h)
col = col.reshape(grid_h, grid_w, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>).repeat(<span class="hljs-number">3</span>, axis=<span class="hljs-number">-2</span>)
row = row.reshape(grid_h, grid_w, <span class="hljs-number">1</span>, <span class="hljs-number">1</span>).repeat(<span class="hljs-number">3</span>, axis=<span class="hljs-number">-2</span>)
grid = np.concatenate((col, row), axis=<span class="hljs-number">-1</span>)
box_xy += grid
box_xy /= (grid_w, grid_h)
box_wh /= self.input_resolution_yolo
box_xy -= (box_wh / <span class="hljs-number">2.</span>)
boxes = np.concatenate((box_xy, box_wh), axis=<span class="hljs-number">-1</span>)
<span class="hljs-comment"># boxes: centroids, box_confidence: confidence level, box_class_probs:</span>
<span class="hljs-comment"># class confidence</span>
<span class="hljs-keyword">return</span> boxes, box_confidence, box_class_probs
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">_filter_boxes</span><span class="hljs-params">(self, boxes, box_confidences, box_class_probs)</span>:</span>
<span class="hljs-string">"""Take in the unfiltered bounding box descriptors and discard each cell
whose score is lower than the object threshold set during class initialization.
Keyword arguments:
boxes -- bounding box coordinates with shape (height,width,3,4); 4 for
x,y,height,width coordinates of the boxes
box_confidences -- bounding box confidences with shape (height,width,3,1); 1 for as
confidence scalar per element
box_class_probs -- class probabilities with shape (height,width,3,CATEGORY_NUM)
"""</span>
box_scores = box_confidences * box_class_probs
box_classes = np.argmax(box_scores, axis=<span class="hljs-number">-1</span>)
box_class_scores = np.max(box_scores, axis=<span class="hljs-number">-1</span>)
pos = np.where(box_class_scores >= self.object_threshold)
boxes = boxes[pos]
classes = box_classes[pos]
scores = box_class_scores[pos]
<span class="hljs-keyword">return</span> boxes, classes, scores
<span class="hljs-function"><span class="hljs-keyword">def</span> <span class="hljs-title">_nms_boxes</span><span class="hljs-params">(self, boxes, box_confidences)</span>:</span>
<span class="hljs-string">"""Apply the Non-Maximum Suppression (NMS) algorithm on the bounding boxes with their
confidence scores and return an array with the indexes of the bounding boxes we want to
keep (and display later).
Keyword arguments:
boxes -- a NumPy array containing N bounding-box coordinates that survived filtering,
with shape (N,4); 4 for x,y,height,width coordinates of the boxes
box_confidences -- a Numpy array containing the corresponding confidences with shape N
"""</span>
x_coord = boxes[:, <span class="hljs-number">0</span>]
y_coord = boxes[:, <span class="hljs-number">1</span>]
width = boxes[:, <span class="hljs-number">2</span>]
height = boxes[:, <span class="hljs-number">3</span>]
areas = width * height
ordered = box_confidences.argsort()[::<span class="hljs-number">-1</span>]
keep = list()
<span class="hljs-keyword">while</span> ordered.size > <span class="hljs-number">0</span>:
<span class="hljs-comment"># Index of the current element:</span>
i = ordered[<span class="hljs-number">0</span>]
keep.append(i)
xx1 = np.maximum(x_coord[i], x_coord[ordered[<span class="hljs-number">1</span>:]])
yy1 = np.maximum(y_coord[i], y_coord[ordered[<span class="hljs-number">1</span>:]])
xx2 = np.minimum(x_coord[i] + width[i], x_coord[ordered[<span class="hljs-number">1</span>:]] + width[ordered[<span class="hljs-number">1</span>:]])
yy2 = np.minimum(y_coord[i] + height[i], y_coord[ordered[<span class="hljs-number">1</span>:]] + height[ordered[<span class="hljs-number">1</span>:]])
width1 = np.maximum(<span class="hljs-number">0.0</span>, xx2 - xx1 + <span class="hljs-number">1</span>)
height1 = np.maximum(<span class="hljs-number">0.0</span>, yy2 - yy1 + <span class="hljs-number">1</span>)
intersection = width1 * height1
union = (areas[i] + areas[ordered[<span class="hljs-number">1</span>:]] - intersection)
<span class="hljs-comment"># Compute the Intersection over Union (IoU) score:</span>
iou = intersection / union
<span class="hljs-comment"># The goal of the NMS algorithm is to reduce the number of adjacent bounding-box</span>
<span class="hljs-comment"># candidates to a minimum. In this step, we keep only those elements whose overlap</span>
<span class="hljs-comment"># with the current bounding box is lower than the threshold:</span>
indexes = np.where(iou <= self.nms_threshold)[<span class="hljs-number">0</span>]
ordered = ordered[indexes + <span class="hljs-number">1</span>]
keep = np.array(keep)
<span class="hljs-keyword">return</span> keep