C++opencv+Advancedeast文本位置检测

最新推荐文章于 2023-12-19 18:27:57 发布

maitd

最新推荐文章于 2023-12-19 18:27:57 发布

阅读量2.1k

点赞数 2

分类专栏：计算机视觉文章标签：深度学习 opencv 文本检测 c++

本文链接：https://blog.csdn.net/qq_33243369/article/details/103636899

版权

计算机视觉专栏收录该内容

5 篇文章 1 订阅

订阅专栏

Advancedeast项目地址:https://github.com/huoyijie/AdvancedEAST
环境:VS2017+opencv4.1.2

运行结果如下:
在这里插入图片描述

基本步骤:

首先介绍一下与AdvancedEAST的使用相关的一些原理.AdvancedEAST的网络结构如下图:
在这里插入图片描述
图片输入网络后.输出三组信息,
图片输入网络后依次输出三种数据,简单使用的话可以只用第一个,我只用了第一个.
从网络结构也可以看出来有三个输出网络,第一个是score,可以通过score的值判断图片上的点是否属于文本的区域,一般大于10是文本的像素点.score相当于是否属于文本的置信度.
第二个输出网络是code,数据的第一位表示该像素是否属于边界像素,第二个判断是头部的像素还是尾部的像素.
第三个输出的数据输出的4位geo，是边界像素可以预测的2个顶点坐标,原文是这么说,不过我没有搞清楚是做什么的,希望有大佬可以告知.
使用opencv的readnet函数读取Advancedeast的模型,我用的是现成的模型,如果需要自己训练模型,可以参见原项目.

网络的输出是长和宽为原图像的四分之一的矩阵,假设我们读入的图片为640640,则score为一个160160的矩阵,矩阵中i行j列的score,代表原图中4i行,4j列元素及其临近区域是否是文本区域.
具体代码如下:


#include "pch.h"
#include <iostream>
#include<opencv2/opencv.hpp>
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/imgproc/imgproc.hpp"

using namespace std;
using namespace cv;
using namespace cv::dnn;

//解码
void decode(const Mat &scores, const Mat &geometry);

Mat text_detect(Mat srcImg, int inpWidth, int inpHeight, float confThreshold, float nmsThreshold, Net net)
{
	//输出
	std::vector<Mat> output;
	std::vector<String> outputLayers(4);
	outputLayers[0] = "side_vertex_coord/convolution";
	outputLayers[1] = "side_vertex_code/convolution";
	outputLayers[2] = "inside_score/convolution";
	outputLayers[3] = "east_detect/concat";


	//检测图像
	Mat frame, blob;
	frame = srcImg.clone();
	//获取深度学习模型的输入
	blobFromImage(frame, blob, 1.0, Size(inpWidth, inpHeight), Scalar(123.68, 116.78, 103.94), true, false);
	net.setInput(blob);
	//输出结果
	net.forward(output, outputLayers);

	//置信度
	Mat scores = output[3];
	//位置参数
	Mat geometry = output[0];

	// Decode predicted bounding boxes， 对检测框进行解码，获取文本框位置方向
	//文本框位置参数
	decode(scores, geometry);



	return frame;
}

//模型地址
auto model = "K:\\UC下载\\east_model_3T256.pb";
//auto model = "./model/frozen_east_text_detection.pb";
//检测图像
auto detect_image = "J:\\MFCApplication1 - 副本\\Testimage\\001019080900028_0113_1.jpg";
//输入框尺寸
auto inpWidth = 640;
auto inpHeight = 640;
//置信度阈值
auto confThreshold = 0.5;
//非极大值抑制算法阈值

auto nmsThreshold = 0.1;
Mat srcImg;
Mat heibai(160, 160, CV_8UC1);

//轮廓按照面积大小升序排序
bool ascendSort(vector<Point> a, vector<Point> b) {
	return a.size() < b.size();

}

//轮廓按照面积大小降序排序
bool descendSort(vector<Point> a, vector<Point> b) {

	return a.size() > b.size();
}

int main()
{
	//读取模型
	Net net = readNet(model);
	//读取检测图像
	//vector<String> layer_names = net.getLayerNames();
	//for (int i = 0; i < layer_names.size(); i++) {
	//	int id = net.getLayerId(layer_names[i]);
	//	auto layer = net.getLayer(id);
	//	printf("layer id:%d, type: %s, name:%s \n", id, layer->type.c_str(), layer->name.c_str());
	//}

	//system("pause");   // 用双引号，不要用单引号
	srcImg = imread(detect_image);
	int or_h = srcImg.rows;
	int or_w = srcImg.cols;
	resize(srcImg, srcImg, Size(640, 640));
	if (!srcImg.empty())
	{
		cout << "read image success!" << endl;
	}
	Mat resultImg = text_detect(srcImg, inpWidth, inpHeight, confThreshold, nmsThreshold, net);
	srcImg = imread(detect_image);
	//Mat element = getStructuringElement(MORPH_RECT, Size(3, 3));
	//dilate(heibai, heibai, element);

	vector< vector< Point> > contours;  //用于保存所有轮廓信息
	
	vector<Point> tempV;				//暂存的轮廓
	findContours(heibai, contours, RETR_EXTERNAL, CHAIN_APPROX_NONE);
	//轮廓按照面积大小进行升序排序
	sort(contours.begin(), contours.end(), descendSort);//升序排序
	vector<vector<Point> >::iterator itc = contours.begin();
	/*Mat element = getStructuringElement(MORPH_RECT, Size(3, 3));
	erode(heibai, heibai, element);*/
	while (itc != contours.end())
	{
		int y = itc->size();
		if (itc->size() < 29)
		{
		itc = contours.erase(itc);
		}

		else
		{
			++itc;
		}

	}

	//draw


	Mat B;
	heibai.copyTo(B);
	drawContours(B, contours, -1, Scalar(0, 0, 0), FILLED);
	heibai = heibai - B;
	findContours(heibai, contours, RETR_EXTERNAL, CHAIN_APPROX_SIMPLE, Point());
	heibai.copyTo(B);
	int i = 0;
	vector<vector<Point> >::iterator itr = contours.begin();
	int last_i = 0;
	for (int i = 0; i < contours.size(); i++)
	{
		Mat tmp(contours.at(i));
		Moments moment = moments(tmp, false);
		if (moment.m00 != 0)//除数不能为0
		{
			int x = cvRound(moment.m10 / moment.m00);//计算重心横坐标
			int y = cvRound(moment.m01 / moment.m00);//计算重心纵坐标
			if (x < 30 || y < 30||x>120||y>120)
			{
				vector< vector< Point> > contours2; //用于保存面积不足100的轮廓
				for (int j = last_i; j < i; j++)
				{
					++itr;
				}
				last_i = i;
				contours2.push_back(*itr);


				drawContours(heibai, contours2, -1, Scalar(0, 0, 0), FILLED);
			}
		}

	}

	//heibai =  B- heibai;
    Mat element = getStructuringElement(MORPH_RECT, Size(3, 3));
	dilate(heibai, heibai, element);
	findContours(heibai, contours, RETR_EXTERNAL, CHAIN_APPROX_NONE);
	Mat tmp1(contours.at(0));
	RotatedRect rect = minAreaRect(tmp1);
	Point2f fourPoint2f[4];
	rect.points(fourPoint2f);
	float ratio_h = (float)or_h / 640;
	float ratio_w = (float)or_w / 640;

	fourPoint2f[0].x = fourPoint2f[0].x * 4 * ratio_w;
	fourPoint2f[0].y = fourPoint2f[0].y * 4 * ratio_h;
	fourPoint2f[1].x = fourPoint2f[1].x * 4 * ratio_w;
	fourPoint2f[1].y = fourPoint2f[1].y * 4 * ratio_h;
	fourPoint2f[2].x = fourPoint2f[2].x * 4 * ratio_w;
	fourPoint2f[2].y = fourPoint2f[2].y * 4 * ratio_h;
	fourPoint2f[3].x = fourPoint2f[3].x * 4 * ratio_w;
	fourPoint2f[3].y = fourPoint2f[3].y * 4 * ratio_h;
	line(srcImg, fourPoint2f[0], fourPoint2f[1], Scalar(0, 255, 0), 2);
	line(srcImg, fourPoint2f[1], fourPoint2f[2], Scalar(0, 255, 0), 2);
	line(srcImg, fourPoint2f[2], fourPoint2f[3], Scalar(0, 255, 0), 2);
	line(srcImg, fourPoint2f[3], fourPoint2f[0], Scalar(0, 255, 0), 2);
	imshow("result", srcImg);
	waitKey();
	return 0;
}


void decode(const Mat &scores, const Mat &geometry)
{
	const int height = geometry.size[2];
	const int width = geometry.size[3];


	for (int y = 0; y < height; y++)
	{
		//识别概率
		const float *isinside= scores.ptr<float>(0, 0, y);
		const float *isbound = scores.ptr<float>(0, 1, y);
		const float *headorwei = scores.ptr<float>(0, 2, y);
		const float *x_1 = geometry.ptr<float>(0, 0, y);
		const float *y_1 = geometry.ptr<float>(0, 1, y);
		const float *x_2 = geometry.ptr<float>(0, 2, y);
		const float *y_2 = geometry.ptr<float>(0, 3, y);
		//遍历所有检测到的检测框
		for (int x = 0; x < width; x++)
		{
			float isinside_1 = isinside[x];
			float isbound_1 = isbound[x];
			float headorwei_1 = headorwei[x];
			
			float x_11 = x_1[x];
			float y_11 = y_1[x];
			float x_22 = x_2[x];
			float y_22 = y_2[x];

			float offsetX = x * 4.0f, offsetY = y * 4.0f;

			//低于阈值忽略该检测框
			if (isinside_1 <15)
			{
				heibai.at<uchar>(y, x) = 0;
				continue;
			}
			circle(srcImg,Point(offsetX, offsetY),2, Scalar(0, 255, 0),-1);
			heibai.at<uchar>(y, x)=255;
		
		}
	}
}

所用模型:
链接：https://pan.baidu.com/s/1EVdtCnY8lwylCgCqvnrwkw
提取码：ojge

maitd

关注

2
点赞
踩
9

收藏

觉得还不错? 一键收藏
11
评论
C++opencv+Advancedeast文本位置检测

项目地址:https://github.com/huoyijie/AdvancedEAST环境:VS2017+opencv4.1.2基本步骤:首先介绍一下与AdvancedEAST的使用相关的一些原理.AdvancedEAST的网络结构如下图:图片输入网络后依次输出三种数据,简单使用的话可以只用第一个,我只用了第一个.从网络结构也可以看出来有三个输出网络,第一个是score,可以...
复制链接

扫一扫