基于DBNet实现mnn框架的条形码检测

基本思想:项目需求,需要对条形码区域进行识别。

一、文件结构

ubuntu@ubuntu:~/find_barcode$ tree -L 2
.
├── best.mnn
├── cmake-build-debug
│   ├── CMakeCache.txt
│   ├── CMakeFiles
│   ├── cmake_install.cmake
│   ├── find_barcode
│   ├── find_barcode.cbp
│   └── Makefile
├── CMakeLists.txt
├── DBNet.onnx
├── include
│   └── MNN
├── lib
│   └── libMNN.so
├── main.cpp
└── testimgs
    ├── 0.jpg
    ├── 1.jpg
    ├── 2.jpg
    ├── 3.jpg
    └── 4.jpg

6 directories, 15 files

二、代码

1.1 cmakelist.txt
cmake_minimum_required(VERSION 3.16)
project(find_barcode)

set(CMAKE_CXX_STANDARD 14)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} -fopenmp")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -fopenmp")

include_directories(${CMAKE_SOURCE_DIR})
include_directories(${CMAKE_SOURCE_DIR}/include)
include_directories(${CMAKE_SOURCE_DIR}/include/MNN)

find_package(OpenCV REQUIRED)
#message(STATUS ${OpenCV_INCLUDE_DIRS})
#添加头文件
include_directories(${OpenCV_INCLUDE_DIRS})
#链接Opencv库

add_library(libmnn SHARED IMPORTED)
set_target_properties(libmnn PROPERTIES IMPORTED_LOCATION ${CMAKE_SOURCE_DIR}/lib/libMNN.so)

add_executable(find_barcode main.cpp)

target_link_libraries(find_barcode ${OpenCV_LIBS} libmnn)
1.2 main.cpp

这里将DBNet类写到一起,后续可以拆分出来。

#define _CRT_SECURE_NO_WARNINGS

#include <iostream>
#include <opencv2/imgproc.hpp>
#include <opencv2/highgui.hpp>
#include <MNN/Interpreter.hpp>
#include <MNN/ImageProcess.hpp>


using namespace cv;
using namespace std;

class DBNet {
public:
    DBNet(const float binaryThreshold = 0.5, const float polygonThreshold = 0.7, const float unclipRatio = 1.5,
          const int maxCandidates = 1000);

    void detect(Mat &srcimg);

private:
    float binaryThreshold;
    float polygonThreshold;
    float unclipRatio;
    int maxCandidates;
    const int inpWidth = 736;
    const int inpHeight = 736;
    const float meanValues[3] = {0.485, 0.456, 0.406};
    const float normValues[3] = {0.229, 0.224, 0.225};

    float contourScore(const Mat &binary, const vector<Point> &contour);

    void unclip(const vector<Point2f> &inPoly, vector<Point2f> &outPoly);


};

DBNet::DBNet(const float binaryThreshold, const float polygonThreshold, const float unclipRatio,
             const int maxCandidates) {
    cout << "run dbnet" << endl;
    this->binaryThreshold = binaryThreshold;
    this->polygonThreshold = polygonThreshold;
    this->unclipRatio = unclipRatio;
    this->maxCandidates = maxCandidates;
    //OrtStatus* status = OrtSessionOptionsAppendExecutionProvider_CUDA(sessionOptions, 0);  gpu

}

void DBNet::detect(Mat &srcimg) {
    int h = srcimg.rows;
    int w = srcimg.cols;
    Mat dst;
    resize(srcimg, dst, Size(this->inpWidth, this->inpHeight));

    auto mnnNet = std::shared_ptr<MNN::Interpreter>(
            MNN::Interpreter::createFromFile("../best.mnn"));
    auto t1 = std::chrono::steady_clock::now();
    MNN::ScheduleConfig netConfig;
    netConfig.type = MNN_FORWARD_CPU;
    netConfig.numThread = 4;

    auto session = mnnNet->createSession(netConfig);
    auto input = mnnNet->getSessionInput(session, nullptr);

    mnnNet->resizeTensor(input, {1, 3, (int) inpWidth, (int) inpHeight});
    mnnNet->resizeSession(session);
    MNN::CV::ImageProcess::Config config;

    const float mean_vals[3] = {255 * 0.485, 255 * 0.456, 255 * 0.406};

    const float norm_255[3] = {1 / (255 * 0.229), 1 / (255 * 0.224), 1 / (255 * 0.225)};

    std::shared_ptr<MNN::CV::ImageProcess> pretreat(
            MNN::CV::ImageProcess::create(MNN::CV::BGR, MNN::CV::RGB, mean_vals, 3,
                                          norm_255, 3));

    pretreat->convert(dst.data, (int) inpWidth, (int) inpHeight, dst.step[0], input);

    MNN::Tensor inputHost(input, input->getDimensionType());
    input->copyToHostTensor(&inputHost);



    mnnNet->runSession(session);


    auto output = mnnNet->getSessionOutput(session, "output");

    MNN::Tensor outputHost(output, output->getDimensionType());
    output->copyToHostTensor(&outputHost);

    int shape_h = outputHost.height();
    int shape_c = outputHost.channel();
    int shape_w = outputHost.width();
    int shape_s = outputHost.size();
    printf("---c= %d  w= %d h= %d s= %d ----\n", shape_c, shape_w, shape_h, shape_s);


    //  for (int i = 0; i < shape_s; i++) { outputCount.push_back(outputHost.host<float>()[i]); }

    const float *floatArray = outputHost.host<float>();
//    for (int i = 0; i < shape_s; i++){
//        std::cout<<floatArray[i]<<" ";
//        if(i==100) break;
//    }
    Mat binary(dst.rows, dst.cols, CV_32FC1);
    memcpy(binary.data, floatArray, shape_s/4* sizeof(float));

    // Threshold
    Mat bitmap;
    threshold(binary, bitmap, binaryThreshold, 255, THRESH_BINARY);
    //cv::imshow("", binary);
    // Scale ratio
    float scaleHeight = (float) (h) / (float) (binary.size[0]);
    float scaleWidth = (float) (w) / (float) (binary.size[1]);
    // Find contours
    vector<vector<Point> > contours;
    bitmap.convertTo(bitmap, CV_8UC1);
    findContours(bitmap, contours, RETR_LIST, CHAIN_APPROX_SIMPLE);

    // Candidate number limitation
    size_t numCandidate = min(contours.size(), (size_t) (maxCandidates > 0 ? maxCandidates : INT_MAX));
    vector<float> confidences;
    vector<vector<Point2f> > results;
    for (size_t i = 0; i < numCandidate; i++) {
        vector<Point> &contour = contours[i];

        // Calculate text contour score
        if (contourScore(binary, contour) < polygonThreshold)
            continue;

        // Rescale
        vector<Point> contourScaled;
        contourScaled.reserve(contour.size());
        for (size_t j = 0; j < contour.size(); j++) {
            contourScaled.push_back(Point(int(contour[j].x * scaleWidth),
                                          int(contour[j].y * scaleHeight)));
        }

        // Unclip
        RotatedRect box = minAreaRect(contourScaled);

        // minArea() rect is not normalized, it may return rectangles with angle=-90 or height < width
        const float angle_threshold = 60;  // do not expect vertical text, TODO detection algo property
        bool swap_size = false;
        if (box.size.width < box.size.height)  // horizontal-wide text area is expected
            swap_size = true;
        else if (fabs(box.angle) >= angle_threshold)  // don't work with vertical rectangles
            swap_size = true;
        if (swap_size) {
            swap(box.size.width, box.size.height);
            if (box.angle < 0)
                box.angle += 90;
            else if (box.angle > 0)
                box.angle -= 90;
        }

        Point2f vertex[4];
        box.points(vertex);  // order: bl, tl, tr, br
        vector<Point2f> approx;
        for (int j = 0; j < 4; j++)
            approx.emplace_back(vertex[j]);
        vector<Point2f> polygon;
        unclip(approx, polygon);
        results.push_back(polygon);
    }
    confidences = vector<float>(contours.size(), 1.0f);
    for (int i = 0; i < results.size(); i++) {
        for (int j = 0; j < 4; j++) {
            circle(srcimg, Point((int) results[i][j].x, (int) results[i][j].y), 2, Scalar(0, 0, 255), -1);
            if (j < 3) {
                line(srcimg, Point((int) results[i][j].x, (int) results[i][j].y),
                     Point((int) results[i][j + 1].x, (int) results[i][j + 1].y), Scalar(0, 255, 0));
            } else {
                line(srcimg, Point((int) results[i][j].x, (int) results[i][j].y),
                     Point((int) results[i][0].x, (int) results[i][0].y), Scalar(0, 255, 0));
            }
        }
    }
}

float DBNet::contourScore(const Mat &binary, const vector<Point> &contour) {
    Rect rect = boundingRect(contour);
    int xmin = max(rect.x, 0);
    int xmax = min(rect.x + rect.width, binary.cols - 1);
    int ymin = max(rect.y, 0);
    int ymax = min(rect.y + rect.height, binary.rows - 1);

    Mat binROI = binary(Rect(xmin, ymin, xmax - xmin + 1, ymax - ymin + 1));

    Mat mask = Mat::zeros(ymax - ymin + 1, xmax - xmin + 1, CV_8U);
    vector<Point> roiContour;
    for (size_t i = 0; i < contour.size(); i++) {
        Point pt = Point(contour[i].x - xmin, contour[i].y - ymin);
        roiContour.push_back(pt);
    }
    vector<vector<Point>> roiContours = {roiContour};
    fillPoly(mask, roiContours, Scalar(1));
    float score = mean(binROI, mask).val[0];
    return score;
}

void DBNet::unclip(const vector<Point2f> &inPoly, vector<Point2f> &outPoly) {
    float area = contourArea(inPoly);
    float length = arcLength(inPoly, true);
    float distance = area * unclipRatio / length;

    size_t numPoints = inPoly.size();
    vector<vector<Point2f>> newLines;
    for (size_t i = 0; i < numPoints; i++) {
        vector<Point2f> newLine;
        Point pt1 = inPoly[i];
        Point pt2 = inPoly[(i - 1) % numPoints];
        Point vec = pt1 - pt2;
        float unclipDis = (float) (distance / norm(vec));
        Point2f rotateVec = Point2f(vec.y * unclipDis, -vec.x * unclipDis);
        newLine.push_back(Point2f(pt1.x + rotateVec.x, pt1.y + rotateVec.y));
        newLine.push_back(Point2f(pt2.x + rotateVec.x, pt2.y + rotateVec.y));
        newLines.push_back(newLine);
    }

    size_t numLines = newLines.size();
    for (size_t i = 0; i < numLines; i++) {
        Point2f a = newLines[i][0];
        Point2f b = newLines[i][1];
        Point2f c = newLines[(i + 1) % numLines][0];
        Point2f d = newLines[(i + 1) % numLines][1];
        Point2f pt;
        Point2f v1 = b - a;
        Point2f v2 = d - c;
        float cosAngle = (v1.x * v2.x + v1.y * v2.y) / (norm(v1) * norm(v2));

        if (fabs(cosAngle) > 0.7) {
            pt.x = (b.x + c.x) * 0.5;
            pt.y = (b.y + c.y) * 0.5;
        } else {
            float denom = a.x * (float) (d.y - c.y) + b.x * (float) (c.y - d.y) +
                          d.x * (float) (b.y - a.y) + c.x * (float) (a.y - b.y);
            float num = a.x * (float) (d.y - c.y) + c.x * (float) (a.y - d.y) + d.x * (float) (c.y - a.y);
            float s = num / denom;

            pt.x = a.x + s * (b.x - a.x);
            pt.y = a.y + s * (b.y - a.y);
        }
        outPoly.push_back(pt);
    }
}

int main() {
    DBNet mynet(0.3, 0.3, 4.5, 1000);
//    string imgpath = "/home/ubuntu/test02.jpg";
    string imgpath = "../testimgs/0.jpg";
    Mat srcimg = imread(imgpath);
    mynet.detect(srcimg);

    imshow("kWinName", srcimg);
    //cv::imwrite("a.jpg",srcimg);
    waitKey(0);
    destroyAllWindows();
}

三、测试结果

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 1
    评论
PAN和DBNet是两种常用的文字检测模型。它们可以用于检测图像或文档中的文字并提取出来。在文字检测后,还需要进行后处理来提高检测结果的准确性和可用性。 首先,文字检测模型(如PAN和DBNet)会将输入的图像或文档送入网络进行处理。这些模型通常包含多层卷积和池化层,以提取图像或文档中的文字特征。最终,模型会输出一个以文字边界框表示的检测结果。 然后,在得到检测结果后,需要对其进行后处理。后处理的过程主要包括以下几个步骤: 1. 去除重叠框:由于检测模型的输出可能会有一些重叠的文字边界框,需要对这些重叠的框进行处理。常用的方法是根据一定的阈值来判断两个框是否重叠,然后选择其中一个框作为最终结果。 2. 合并连接框:一些文字可能会被检测成多个连接起来的框,需要将这些框合并成一个整体。常用的方法是利用文字的拓扑结构进行合并,即根据文字边界框之间的相对位置关系来进行判断和调整。 3. 框的调整和修正:有时候检测模型可能会将文字的边界框检测得不够准确,需要对框进行一些调整和修正。比如,可以根据文字的几何信息来调整框的位置和大小,或者根据文本行的分布规律来修正框的倾斜角度。 4. 锚点生成:在某些情况下,文字检测可能会漏检或检测错误。为了解决这个问题,可以采用一些方法来生成针对漏检区域的额外锚点。这样,即便漏检了一些文字,也可以通过后续的处理来恢复或补充。 综上所述,PAN和DBNet的文字检测后处理过程包括去除重叠框、合并连接框、框的调整和修正以及锚点生成等环节。这些后处理步骤可以进一步提升文字检测的准确性和可用性,使得检测得到的文字结果更加省略具体。

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论 1
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值