YOLO 后处理nms的纯C++实现，不依托任何的库

豆浩宇

已于 2025-10-27 17:28:08 修改

阅读量413

点赞数 9

CC 4.0 BY-SA版权

文章标签： YOLO c++ 开发语言算法计算机视觉图像处理人工智能

于 2025-10-27 17:26:09 首次发布

本文链接：https://blog.csdn.net/Douhaoyu/article/details/153976146

1.前言：yolo的模型转onnx，然后转tensorrt的过程中，可以直接设置启用cuda自带的nms，这样子yolo的C++部署代码就不需要单独对yolo推理结果做nms了。只需要导出的时候加上这行–end2end。同时设置默认的置信度conf_thres和nms的参数就是iou_thres
parser.add_argument(“–conf_thres”, default=0.5, type=float,
help=“The conf threshold for the nms, default: 0.4”)
parser.add_argument(“–iou_thres”, default=0.5, type=float,
help=“The iou threshold for the nms, default: 0.5”)

pt2onnx
python pt2onnx.py
2.onnx2trt
python export.py -o .\best.onnx -e yolo12m.trt --end2end --v8 -p fp32
2.实现代码

#include <vector>
#include <algorithm>
#include <cmath>
#include <unordered_map>
#include <iostream>
#include <random>
#include <chrono>

// 边界框结构体
struct BBox {
    float x1, y1, x2, y2;
    float score;
    int class_id;
    int index;
};

// 高效计算 IoU (交并比)
inline float calculate_iou(const BBox& a, const BBox& b) {
    float inter_x1 = std::max(a.x1, b.x1);
    float inter_y1 = std::max(a.y1, b.y1);
    float inter_x2 = std::min(a.x2, b.x2);
    float inter_y2 = std::min(a.y2, b.y2);
    
    float inter_w = std::max(0.0f, inter_x2 - inter_x1);
    float inter_h = std::max(0.0f, inter_y2 - inter_y1);
    float inter_area = inter_w * inter_h;
    
    float area_a = (a.x2 - a.x1) * (a.y2 - a.y1);
    float area_b = (b.x2 - b.x1) * (b.y2 - b.y1);
    
    float union_area = area_a + area_b - inter_area;
    return (union_area > 0) ? inter_area / union_area : 0.0f;
}

// 单类别 NMS 实现
std::vector<int> efficient_nms(std::vector<BBox>& boxes, float iou_threshold, int max_output = -1) {
    if (boxes.empty()) return {};
    
    // 按得分降序排序
    std::sort(boxes.begin(), boxes.end(), [](const BBox& a, const BBox& b) {
        return a.score > b.score;
    });
    
    std::vector<int> keep_indices;
    std::vector<bool> suppressed(boxes.size(), false);
    
    for (int i = 0; i < boxes.size(); ++i) {
        if (suppressed[i]) continue;
        
        keep_indices.push_back(boxes[i].index);
        if (max_output > 0 && static_cast<int>(keep_indices.size()) >= max_output) break;
        
        for (int j = i + 1; j < boxes.size(); ++j) {
            if (suppressed[j]) continue;
            if (boxes[i].class_id != boxes[j].class_id) continue;
            
            if (calculate_iou(boxes[i], boxes[j]) > iou_threshold) {
                suppressed[j] = true;
            }
        }
    }
    
    return keep_indices;
}

// 多类别 NMS 实现
std::vector<int> multiclass_nms(std::vector<BBox>& boxes, float iou_threshold, int max_output_per_class = -1) {
    // 使用映射按类别分组
    std::unordered_map<int, std::vector<BBox>> class_groups;
    for (auto& box : boxes) {
        class_groups[box.class_id].push_back(box);
    }
    
    std::vector<int> keep_indices;
    for (auto& group : class_groups) {
        auto& class_boxes = group.second;
        auto class_keep = efficient_nms(class_boxes, iou_threshold, max_output_per_class);
        keep_indices.insert(keep_indices.end(), class_keep.begin(), class_keep.end());
    }
    
    return keep_indices;
}

// 模拟 YOLO 模型输出
std::vector<BBox> generate_yolo_output(int num_boxes, int num_classes) {
    std::vector<BBox> boxes;
    std::random_device rd;
    std::mt19937 gen(rd());
    std::uniform_real_distribution<float> coord_dist(0.0f, 1.0f);
    std::uniform_real_distribution<float> score_dist(0.1f, 0.99f);
    std::uniform_int_distribution<int> class_dist(0, num_classes - 1);
    
    for (int i = 0; i < num_boxes; ++i) {
        float x1 = coord_dist(gen);
        float y1 = coord_dist(gen);
        float w = coord_dist(gen) * 0.2f;
        float h = coord_dist(gen) * 0.2f;
        
        BBox box;
        box.x1 = x1;
        box.y1 = y1;
        box.x2 = x1 + w;
        box.y2 = y1 + h;
        box.score = score_dist(gen);
        box.class_id = class_dist(gen);
        box.index = i; // 保存原始索引
        
        boxes.push_back(box);
    }
    
    return boxes;
}

// 打印边界框信息
void print_boxes(const std::vector<BBox>& boxes, const std::string& title) {
    std::cout << "\n" << title << " (" << boxes.size() << " boxes):\n";
    for (const auto& box : boxes) {
        std::cout << "Index: " << box.index 
                  << ", Class: " << box.class_id
                  << ", Score: " << box.score
                  << ", Box: [" << box.x1 << ", " << box.y1 << ", " 
                  << box.x2 << ", " << box.y2 << "]\n";
    }
}

int main() {
    // 模拟 YOLO 输出
    const int NUM_BOXES = 1000;
    const int NUM_CLASSES = 10;
    const float IOU_THRESHOLD = 0.5f;
    const int MAX_OUTPUT_PER_CLASS = 5;
    
    auto yolo_output = generate_yolo_output(NUM_BOXES, NUM_CLASSES);
    
    // 应用多类别 NMS
    auto start = std::chrono::high_resolution_clock::now();
    auto keep_indices = multiclass_nms(yolo_output, IOU_THRESHOLD, MAX_OUTPUT_PER_CLASS);
    auto end = std::chrono::high_resolution_clock::now();
    
    // 提取保留的边界框
    std::vector<BBox> final_boxes;
    for (int idx : keep_indices) {
        // 在原始输出中查找对应的边界框
        auto it = std::find_if(yolo_output.begin(), yolo_output.end(), 
                             [idx](const BBox& b) { return b.index == idx; });
        if (it != yolo_output.end()) {
            final_boxes.push_back(*it);
        }
    }
    
    // 计算处理时间
    auto duration = std::chrono::duration_cast<std::chrono::microseconds>(end - start);
    
    // 输出结果
    std::cout << "YOLO NMS 处理结果:\n";
    std::cout << "原始边界框数量: " << NUM_BOXES << "\n";
    std::cout << "保留边界框数量: " << final_boxes.size() << "\n";
    std::cout << "处理时间: " << duration.count() << " 微秒\n";
    
    // 打印部分结果（避免输出过多）
    std::cout << "\n前 10 个保留的边界框:\n";
    for (int i = 0; i < std::min(10, static_cast<int>(final_boxes.size())); ++i) {
        const auto& box = final_boxes[i];
        std::cout << "Index: " << box.index 
                  << ", Class: " << box.class_id
                  << ", Score: " << box.score
                  << ", Box: [" << box.x1 << ", " << box.y1 << ", " 
                  << box.x2 << ", " << box.y2 << "]\n";
    }
    
    return 0;
}

代码说明

YOLO 模型输出模拟
generate_yolo_output函数模拟 YOLO 模型的输出：
生成随机边界框（坐标在 [0,1] 范围内）
为每个边界框分配随机置信度分数和类别 ID
保存原始索引以便后续恢复
NMS 处理流程
在 main函数中：
生成模拟的 YOLO 输出（1000 个边界框，10 个类别）
调用 multiclass_nms应用多类别 NMS
测量处理时间
提取保留的边界框
输出处理结果和部分保留的边界框
关键参数
IOU_THRESHOLD = 0.5f：IoU 阈值，超过此值的重叠边界框将被抑制
MAX_OUTPUT_PER_CLASS = 5：每个类别最多保留的边界框数量
NUM_BOXES = 1000：模拟生成的边界框数量
NUM_CLASSES = 10：模拟的类别数量
如何集成到真实 YOLO 实现
在实际 YOLO 实现中，集成 NMS 的步骤如下：
获取模型输出：
// 假设 model_output 是 YOLO 模型的原始输出
std::vector raw_boxes = get_yolo_output(image);
应用置信度阈值过滤：
const float CONF_THRESHOLD = 0.5f;
std::vector filtered_boxes;
for (const auto& box : raw_boxes) {
if (box.score >= CONF_THRESHOLD) {
filtered_boxes.push_back(box);
}
}
应用多类别 NMS：
const float IOU_THRESHOLD = 0.5f;
const int MAX_OUTPUT_PER_CLASS = 100; // 根据需求调整
auto keep_indices = multiclass_nms(filtered_boxes, IOU_THRESHOLD, MAX_OUTPUT_PER_CLASS);
提取最终结果：
std::vector final_detections;
for (int idx : keep_indices) {
auto it = std::find_if(filtered_boxes.begin(), filtered_boxes.end(),
[idx](const BBox& b) { return b.index == idx; });
if (it != filtered_boxes.end()) {
final_detections.push_back(*it);
}
}
处理最终检测结果：
for (const auto& detection : final_detections) {
// 绘制边界框、输出类别信息等
draw_box(image, detection);
print_class(detection.class_id, detection.score);
}