目标检测算法简介

目标检测算法简介

目标检测算法是一类计算机视觉技术,用于识别和定位图像或视频中的目标对象。常见的目标检测算法包括:

1. R-CNN (Region-based Convolutional Neural Networks):

  • R-CNN: 通过选择性搜索生成候选区域,然后使用卷积神经网络(CNN)对每个候选区域进行分类。
  • Fast R-CNN: 改进了R-CNN,通过共享卷积特征来加速处理。
  • Faster R-CNN: 进一步改进了Fast R-CNN,引入了区域建议网络(RPN)来生成候选区域。

示例代码:

#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace cv;
using namespace dnn;
using namespace std;

void detectObjects(Mat& frame, Net& net, const vector<string>& classNames) {
    Mat blob;
    blobFromImage(frame, blob, 1.0, Size(224, 224), Scalar(104, 117, 123), false, false);
    net.setInput(blob);
    Mat prob = net.forward();

    double confidence;
    Point classIdPoint;
    minMaxLoc(prob.reshape(1, 1), 0, &confidence, 0, &classIdPoint);
    int classId = classIdPoint.x;

    if (confidence > 0.5) {
        putText(frame, classNames[classId], Point(10, 30), FONT_HERSHEY_SIMPLEX, 1, Scalar(0, 255, 0), 2);
        cout << "Detected: " << classNames[classId] << " with confidence: " << confidence << endl;
    }
}

int main() {
    string modelConfiguration = "deploy.prototxt";
    string modelWeights = "bvlc_reference_caffenet.caffemodel";
    string classesFile = "synset_words.txt";

    Net net = readNetFromCaffe(modelConfiguration, modelWeights);

    vector<string> classNames;
    ifstream ifs(classesFile.c_str());
    string line;
    while (getline(ifs, line)) classNames.push_back(line);

    VideoCapture cap(0);
    if (!cap.isOpened()) {
        cout << "Error opening video stream" << endl;
        return -1;
    }

    while (true) {
        Mat frame;
        cap >> frame;
        if (frame.empty()) break;

        detectObjects(frame, net, classNames);

        imshow("R-CNN Object Detection", frame);
        if (waitKey(1) == 27) break; // Press 'ESC' to exit
    }

    cap.release();
    destroyAllWindows();
    return 0;
}

2. YOLO (You Only Look Once):

  • YOLO将目标检测问题视为一个回归问题,直接在图像上预测边界框和类别概率。YOLO的主要优点是速度快,适合实时应用。
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace cv;
using namespace dnn;
using namespace std;

void detectObjects(Mat& frame, Net& net, const vector<string>& classNames) {
    Mat blob;
    blobFromImage(frame, blob, 1/255.0, Size(416, 416), Scalar(), true, false);
    net.setInput(blob);
    vector<Mat> outs;
    net.forward(outs, net.getUnconnectedOutLayersNames());

    float confThreshold = 0.5;
    for (size_t i = 0; i < outs.size(); ++i) {
        float* data = (float*)outs[i].data;
        for (int j = 0; j < outs[i].rows; ++j, data += outs[i].cols) {
            Mat scores = outs[i].row(j).colRange(5, outs[i].cols);
            Point classIdPoint;
            double confidence;
            minMaxLoc(scores, 0, &confidence, 0, &classIdPoint);
            if (confidence > confThreshold) {
                int centerX = (int)(data[0] * frame.cols);
                int centerY = (int)(data[1] * frame.rows);
                int width = (int)(data[2] * frame.cols);
                int height = (int)(data[3] * frame.rows);
                int left = centerX - width / 2;
                int top = centerY - height / 2;

                rectangle(frame, Point(left, top), Point(left + width, top + height), Scalar(0, 255, 0), 3);
                putText(frame, classNames[classIdPoint.x], Point(left, top - 10), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
            }
        }
    }
}

int main() {
    string modelConfiguration = "yolov3.cfg";
    string modelWeights = "yolov3.weights";
    string classesFile = "coco.names";

    Net net = readNetFromDarknet(modelConfiguration, modelWeights);
    net.setPreferableBackend(DNN_BACKEND_OPENCV);
    net.setPreferableTarget(DNN_TARGET_CPU);

    vector<string> classNames;
    ifstream ifs(classesFile.c_str());
    string line;
    while (getline(ifs, line)) classNames.push_back(line);

    VideoCapture cap(0);
    if (!cap.isOpened()) {
        cout << "Error opening video stream" << endl;
        return -1;
    }

    while (true) {
        Mat frame;
        cap >> frame;
        if (frame.empty()) break;

        detectObjects(frame, net, classNames);

        imshow("YOLO Object Detection", frame);
        if (waitKey(1) == 27) break; // Press 'ESC' to exit
    }

    cap.release();
    destroyAllWindows();
    return 0;
}

3. SSD (Single Shot MultiBox Detector):

  • SSD在不同尺度的特征图上进行检测,能够同时预测多个类别和边界框。SSD在速度和精度之间取得了良好的平衡。
#include <opencv2/opencv.hpp>
#include <opencv2/dnn.hpp>
#include <iostream>
#include <fstream>

using namespace cv;
using namespace dnn;
using namespace std;

void detectObjects(Mat& frame, Net& net, const vector<string>& classNames) {
    Mat blob;
    blobFromImage(frame, blob, 0.007843, Size(300, 300), 127.5, false, false);
    net.setInput(blob);
    Mat detections = net.forward();

    float confidenceThreshold = 0.5;
    for (int i = 0; i < detections.size[2]; ++i) {
        float confidence = detections.at<float>(0, 0, i, 2);
        if (confidence > confidenceThreshold) {
            int classId = static_cast<int>(detections.at<float>(0, 0, i, 1));
            int left = static_cast<int>(detections.at<float>(0, 0, i, 3) * frame.cols);
            int top = static_cast<int>(detections.at<float>(0, 0, i, 4) * frame.rows);
            int right = static_cast<int>(detections.at<float>(0, 0, i, 5) * frame.cols);
            int bottom = static_cast<int>(detections.at<float>(0, 0, i, 6) * frame.rows);

            rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0), 2);
            putText(frame, classNames[classId], Point(left, top - 10), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
        }
    }
}

int main() {
    string modelConfiguration = "deploy.prototxt";
    string modelWeights = "VGG_VOC0712_SSD_300x300_iter_120000.caffemodel";
    string classesFile = "coco.names";

    Net net = readNetFromCaffe(modelConfiguration, modelWeights);

    vector<string> classNames;
    ifstream ifs(classesFile.c_str());
    string line;
    while (getline(ifs, line)) classNames.push_back(line);

    VideoCapture cap(0);
    if (!cap.isOpened()) {
        cout << "Error opening video stream" << endl;
        return -1;
    }

    while (true) {
        Mat frame;
        cap >> frame;
        if (frame.empty()) break;

        detectObjects(frame, net, classNames);

        imshow("SSD Object Detection", frame);
        if (waitKey(1) == 27) break; // Press 'ESC' to exit
    }

    cap.release();
    destroyAllWindows();
    return 0;
}

4. RetinaNet:

  • RetinaNet引入了Focal Loss来处理类别不平衡问题,特别适用于检测小目标。
#include <torch/script.h> // One-stop header.
#include <opencv2/opencv.hpp>
#include <iostream>
#include <memory>

using namespace cv;
using namespace std;

void detectObjects(Mat& frame, torch::jit::script::Module& module, const vector<string>& classNames) {
    // Preprocess the image
    Mat img;
    cv::resize(frame, img, Size(800, 800));
    img.convertTo(img, CV_32F, 1.0 / 255);
    auto input_tensor = torch::from_blob(img.data, {1, img.rows, img.cols, 3});
    input_tensor = input_tensor.permute({0, 3, 1, 2});
    input_tensor = input_tensor.to(torch::kCUDA);

    // Forward pass
    auto output = module.forward({input_tensor}).toTuple();

    // Postprocess the output
    auto detections = output->elements()[0].toTensor().to(torch::kCPU);
    auto scores = output->elements()[1].toTensor().to(torch::kCPU);
    auto labels = output->elements()[2].toTensor().to(torch::kCPU);

    float confidenceThreshold = 0.5;
    for (int i = 0; i < detections.size(0); ++i) {
        if (scores[i].item<float>() > confidenceThreshold) {
            int classId = labels[i].item<int>();
            auto box = detections[i];
            int left = static_cast<int>(box[0].item<float>() * frame.cols);
            int top = static_cast<int>(box[1].item<float>() * frame.rows);
            int right = static_cast<int>(box[2].item<float>() * frame.cols);
            int bottom = static_cast<int>(box[3].item<float>() * frame.rows);

            rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0), 2);
            putText(frame, classNames[classId], Point(left, top - 10), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);
        }
    }
}

int main() {
    // Load the model
    string modelPath = "retinanet_model.pt";
    torch::jit::script::Module module;
    try {
        module = torch::jit::load(modelPath);
        module.to(torch::kCUDA);
    }
    catch (const c10::Error& e) {
        cerr << "Error loading the model\n";
        return -1;
    }

    // Load class names
    string classesFile = "coco.names";
    vector<string> classNames;
    ifstream ifs(classesFile.c_str());
    string line;
    while (getline(ifs, line)) classNames.push_back(line);

    // Open video capture
    VideoCapture cap(0);
    if (!cap.isOpened()) {
        cout << "Error opening video stream" << endl;
        return -1;
    }

    while (true) {
        Mat frame;
        cap >> frame;
        if (frame.empty()) break;

        detectObjects(frame, module, classNames);

        imshow("RetinaNet Object Detection", frame);
        if (waitKey(1) == 27) break; // Press 'ESC' to exit
    }

    cap.release();
    destroyAllWindows();
    return 0;
}

5. Mask R-CNN:

  • Mask R-CNN是Faster R-CNN的扩展,不仅可以进行目标检测,还可以进行实例分割。
    这些算法通常使用深度学习框架(如TensorFlow、PyTorch)进行实现,并且需要大量标注数据进行训练。选择合适的目标检测算法取决于具体应用场景的需求,如实时性、精度和计算资源等。
#include <torch/script.h> // One-stop header.
#include <opencv2/opencv.hpp>
#include <iostream>
#include <memory>

using namespace cv;
using namespace std;

void detectObjects(Mat& frame, torch::jit::script::Module& module, const vector<string>& classNames) {
    // Preprocess the image
    Mat img;
    cv::resize(frame, img, Size(800, 800));
    img.convertTo(img, CV_32F, 1.0 / 255);
    auto input_tensor = torch::from_blob(img.data, {1, img.rows, img.cols, 3});
    input_tensor = input_tensor.permute({0, 3, 1, 2});
    input_tensor = input_tensor.to(torch::kCUDA);

    // Forward pass
    auto output = module.forward({input_tensor}).toTuple();

    // Postprocess the output
    auto boxes = output->elements()[0].toTensor().to(torch::kCPU);
    auto labels = output->elements()[1].toTensor().to(torch::kCPU);
    auto scores = output->elements()[2].toTensor().to(torch::kCPU);
    auto masks = output->elements()[3].toTensor().to(torch::kCPU);

    float confidenceThreshold = 0.5;
    for (int i = 0; i < boxes.size(0); ++i) {
        if (scores[i].item<float>() > confidenceThreshold) {
            int classId = labels[i].item<int>();
            auto box = boxes[i];
            int left = static_cast<int>(box[0].item<float>() * frame.cols);
            int top = static_cast<int>(box[1].item<float>() * frame.rows);
            int right = static_cast<int>(box[2].item<float>() * frame.cols);
            int bottom = static_cast<int>(box[3].item<float>() * frame.rows);

            rectangle(frame, Point(left, top), Point(right, bottom), Scalar(0, 255, 0), 2);
            putText(frame, classNames[classId], Point(left, top - 10), FONT_HERSHEY_SIMPLEX, 0.5, Scalar(0, 255, 0), 2);

            // Extract and draw the mask
            auto mask = masks[i][classId];
            Mat maskMat(Size(mask.size(1), mask.size(0)), CV_32F, mask.data_ptr<float>());
            resize(maskMat, maskMat, Size(right - left, bottom - top));
            Mat coloredRoi = frame(Rect(left, top, right - left, bottom - top));
            coloredRoi.setTo(Scalar(0, 0, 255), maskMat > 0.5);
        }
    }
}

int main() {
    // Load the model
    string modelPath = "mask_rcnn_model.pt";
    torch::jit::script::Module module;
    try {
        module = torch::jit::load(modelPath);
        module.to(torch::kCUDA);
    }
    catch (const c10::Error& e) {
        cerr << "Error loading the model\n";
        return -1;
    }

    // Load class names
    string classesFile = "coco.names";
    vector<string> classNames;
    ifstream ifs(classesFile.c_str());
    string line;
    while (getline(ifs, line)) classNames.push_back(line);

    // Open video capture
    VideoCapture cap(0);
    if (!cap.isOpened()) {
        cout << "Error opening video stream" << endl;
        return -1;
    }

    while (true) {
        Mat frame;
        cap >> frame;
        if (frame.empty()) break;

        detectObjects(frame, module, classNames);

        imshow("Mask R-CNN Object Detection", frame);
        if (waitKey(1) == 27) break; // Press 'ESC' to exit
    }

    cap.release();
    destroyAllWindows();
    return 0;
}

  • 16
    点赞
  • 11
    收藏
    觉得还不错? 一键收藏
  • 打赏
    打赏
  • 0
    评论
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包

打赏作者

申徒嘉

你的鼓励将是我创作的最大动力

¥1 ¥2 ¥4 ¥6 ¥10 ¥20
扫码支付:¥1
获取中
扫码支付

您的余额不足,请更换扫码支付或充值

打赏作者

实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值