1.软件包准备
- opencv源码包地址: 官网 github
- opencv_contrib源码包地址: github
- Tesseract源码包地址: github
- cmake.exe 下载地址: 官网
- qt 下载地址: 官网
注意: opencv和open_contrib包的版本号要一致(比如都是3.4.0)
Tesseract源码安装参考: Win10 使用MinGW-w64编译Tesseract4.0
2. 在环境变量PATH中添加:
C:\Qt\Qt5.9.0\5.9\mingw53_32\bin C:\Qt\Qt5.9.0\Tools\mingw530_32\bin
一方面方便日后在cmd中直接使用gcc、g++,qmake和mingw32-make
另一方面,方便下一步cmake查找Qt相关配置
3. 使用cmake生成解决方案
如果提示:
直接将 "CMAKE_SH"项删除即可。
修改配置如下:
- CMAKE_BUILD_TYPE: Debug或者Release
- CMAKE_INSTALL_PREFIX: 指定程序安装位置
- ENABLE_CXX11: 支持c11特性
- WITH_QT
- WITH_OPENGL
- OPENCV_EXTRA_MODULES_PATH: 若使用opencv_contrib模块,则在此处填写解压后的路径,如 F:\opencv_contrib\modules\
- Tesseract_INCLUDE_DIR: Tesseract头文件所在路径
- Tesseract_LIBRARY: Tesseract lib文件所在路径
- Lept_LIBRARY: leptonica lib文件所在路径 (很重要, 一定要配置,否则可能找不到Tesseract)
建议取消勾选:
- BUILD_DOCS :生成文档,需要安装Doxygen。官网提供了在线文档和离线文档。
- BUILD_PERF_TESTS: 性能测试相关
- BUILD_TESTS: 测试相关
- BUILD_opencv_ts :一些单元测试代码。
- INSTALL_TESTS :与开发无关。
配置截图:
Tesseract 相关:
Qt 相关:
# 模块相关配置 OpenCV modules: To be built: aruco bgsegm bioinspired calib3d ccalib core datasets dnn dpm face features2d flann fuzzy highgui img_hash imgcodecs imgproc line_descriptor ml objdetect optflow phase_unwrapping photo plot reg rgbd saliency shape stereo stitching structured_light superres surface_matching text tracking video videoio videostab world xfeatures2d ximgproc xobjdetect xphoto Disabled: js python2 python_bindings_generator cvv Disabled by dependency: - Unavailable: cnn_3dobj cudaarithm cudabgsegm cudacodec cudafeatures2d cudafilters cudaimgproc cudalegacy cudaobjdetect cudaoptflow cudastereo cudawarping cudev dnn_modern freetype hdf java matlab ovis python3 python3 sfm ts viz Applications: apps Documentation: NO Non-free algorithms: NO
BUILD_opencv_world : 将.lib或者.dll文件统一整合进一个world文件中,方便使用。但若想只使用一部分模块可不勾选以减少体积
重要:
如果勾选BUILD_opencv_world, 就需要取消勾选BUILD_opencv_cvv,否则会出现以下错误
4.安装:
5. 遇到的问题:
1) 'sprintf_instead_use_StringCbPrintfA_or_StringCchPrintfA' was not declared in this scope .
解决方法: 修改opencv源码目录中modules\videoio\src\cap_dshow.cpp, 找到#include "DShow.h",然后在其上面添加一行
release版本:
#define NO_DSHOW_STRSAFE
debug版本:
#define STRSAFE_NO_DEPRECATE
如下图:
2)
解决方法:
关闭预编译头, 取消勾选"ENABLE_PRECOMPILED_HEADERS"
参考: https://wiki.qt.io/How_to_setup_Qt_and_openCV_on_Windows
6.目录结构
7. 向环境变量PATH中加入dll所在路径
D:\opencv\x86\mingw\bin
8. 测试
打开Qtcreator,创建一个c++项目。
测试1:
代码:
#include <opencv2/opencv.hpp> using namespace cv; int main() { Mat im = imread("lena.png"); namedWindow("Image"); imshow("Image", im); waitKey(0); destroyWindow("Image"); return 0; }
.pro配置文件:
TEMPLATE = app CONFIG += console c++11 CONFIG -= app_bundle CONFIG -= qt SOURCES += main.cpp INCLUDEPATH += D:\opencv\include CONFIG(debug, debug | release) { LIBS += D:\opencv\x86\mingw\bin\libopencv_world340d.dll } else { LIBS += -LD:\opencv\x86\mingw\lib -lopencv_world340 }
效果:
测试2:
1 /* 2 * textdetection.cpp 3 * 4 * A demo program of End-to-end Scene Text Detection and Recognition: 5 * Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in: 6 * Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012 7 * 8 * Created on: Jul 31, 2014 9 * Author: Lluis Gomez i Bigorda <lgomez AT cvc.uab.es> 10 */ 11 12 #include "opencv2/text.hpp" 13 #include "opencv2/core/utility.hpp" 14 #include "opencv2/highgui.hpp" 15 #include "opencv2/imgproc.hpp" 16 17 #include <iostream> 18 19 using namespace std; 20 using namespace cv; 21 using namespace cv::text; 22 23 //Calculate edit distance between two words 24 size_t edit_distance(const string& A, const string& B); 25 size_t min(size_t x, size_t y, size_t z); 26 bool isRepetitive(const string& s); 27 bool sort_by_lenght(const string &a, const string &b); 28 //Draw ER's in an image via floodFill 29 void er_draw(vector<Mat> &channels, vector<vector<ERStat> > ®ions, vector<Vec2i> group, Mat& segmentation); 30 31 //Perform text detection and recognition and evaluate results using edit distance 32 int main1(int argc, char* argv[]) 33 { 34 cout << endl << argv[0] << endl << endl; 35 cout << "A demo program of End-to-end Scene Text Detection and Recognition: " << endl; 36 cout << "Shows the use of the Tesseract OCR API with the Extremal Region Filter algorithm described in:" << endl; 37 cout << "Neumann L., Matas J.: Real-Time Scene Text Localization and Recognition, CVPR 2012" << endl << endl; 38 39 Mat image; 40 41 if(argc>1) 42 image = imread(argv[1]); 43 else 44 { 45 cout << " Usage: " << argv[0] << " <input_image> [<gt_word1> ... <gt_wordN>]" << endl; 46 return(0); 47 } 48 49 cout << "IMG_W=" << image.cols << endl; 50 cout << "IMG_H=" << image.rows << endl; 51 52 /*Text Detection*/ 53 54 // Extract channels to be processed individually 55 vector<Mat> channels; 56 57 Mat grey; 58 cvtColor(image,grey,COLOR_RGB2GRAY); 59 60 // Notice here we are only using grey channel, see textdetection.cpp for example with more channels 61 channels.push_back(grey); 62 channels.push_back(255-grey); 63 64 double t_d = (double)getTickCount(); 65 // Create ERFilter objects with the 1st and 2nd stage default classifiers 66 Ptr<ERFilter> er_filter1 = createERFilterNM1(loadClassifierNM1("trained_classifierNM1.xml"),8,0.00015f,0.13f,0.2f,true,0.1f); 67 Ptr<ERFilter> er_filter2 = createERFilterNM2(loadClassifierNM2("trained_classifierNM2.xml"),0.5); 68 69 vector<vector<ERStat> > regions(channels.size()); 70 // Apply the default cascade classifier to each independent channel (could be done in parallel) 71 for (int c=0; c<(int)channels.size(); c++) 72 { 73 er_filter1->run(channels[c], regions[c]); 74 er_filter2->run(channels[c], regions[c]); 75 } 76 cout << "TIME_REGION_DETECTION = " << ((double)getTickCount() - t_d)*1000/getTickFrequency() << endl; 77 78 Mat out_img_decomposition= Mat::zeros(image.rows+2, image.cols+2, CV_8UC1); 79 vector<Vec2i> tmp_group; 80 for (int i=0; i<(int)regions.size(); i++) 81 { 82 for (int j=0; j<(int)regions[i].size();j++) 83 { 84 tmp_group.push_back(Vec2i(i,j)); 85 } 86 Mat tmp= Mat::zeros(image.rows+2, image.cols+2, CV_8UC1); 87 er_draw(channels, regions, tmp_group, tmp); 88 if (i > 0) 89 tmp = tmp / 2; 90 out_img_decomposition = out_img_decomposition | tmp; 91 tmp_group.clear(); 92 } 93 94 double t_g = (double)getTickCount(); 95 // Detect character groups 96 vector< vector<Vec2i> > nm_region_groups; 97 vector<Rect> nm_boxes; 98 erGrouping(image, channels, regions, nm_region_groups, nm_boxes,ERGROUPING_ORIENTATION_HORIZ); 99 cout << "TIME_GROUPING = " << ((double)getTickCount() - t_g)*1000/getTickFrequency() << endl; 100 101 102 103 /*Text Recognition (OCR)*/ 104 105 double t_r = (double)getTickCount(); 106 Ptr<OCRTesseract> ocr = OCRTesseract::create(); 107 cout << "TIME_OCR_INITIALIZATION = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl; 108 string output; 109 110 Mat out_img; 111 Mat out_img_detection; 112 Mat out_img_segmentation = Mat::zeros(image.rows+2, image.cols+2, CV_8UC1); 113 image.copyTo(out_img); 114 image.copyTo(out_img_detection); 115 float scale_img = 600.f/image.rows; 116 float scale_font = (float)(2-scale_img)/1.4f; 117 vector<string> words_detection; 118 119 t_r = (double)getTickCount(); 120 121 for (int i=0; i<(int)nm_boxes.size(); i++) 122 { 123 124 rectangle(out_img_detection, nm_boxes[i].tl(), nm_boxes[i].br(), Scalar(0,255,255), 3); 125 126 Mat group_img = Mat::zeros(image.rows+2, image.cols+2, CV_8UC1); 127 er_draw(channels, regions, nm_region_groups[i], group_img); 128 Mat group_segmentation; 129 group_img.copyTo(group_segmentation); 130 //image(nm_boxes[i]).copyTo(group_img); 131 group_img(nm_boxes[i]).copyTo(group_img); 132 copyMakeBorder(group_img,group_img,15,15,15,15,BORDER_CONSTANT,Scalar(0)); 133 134 vector<Rect> boxes; 135 vector<string> words; 136 vector<float> confidences; 137 ocr->run(group_img, output, &boxes, &words, &confidences, OCR_LEVEL_WORD); 138 139 output.erase(remove(output.begin(), output.end(), '\n'), output.end()); 140 //cout << "OCR output = \"" << output << "\" length = " << output.size() << endl; 141 if (output.size() < 3) 142 continue; 143 144 for (int j=0; j<(int)boxes.size(); j++) 145 { 146 boxes[j].x += nm_boxes[i].x-15; 147 boxes[j].y += nm_boxes[i].y-15; 148 149 //cout << " word = " << words[j] << "\t confidence = " << confidences[j] << endl; 150 if ((words[j].size() < 2) || (confidences[j] < 51) || 151 ((words[j].size()==2) && (words[j][0] == words[j][1])) || 152 ((words[j].size()< 4) && (confidences[j] < 60)) || 153 isRepetitive(words[j])) 154 continue; 155 words_detection.push_back(words[j]); 156 rectangle(out_img, boxes[j].tl(), boxes[j].br(), Scalar(255,0,255),3); 157 Size word_size = getTextSize(words[j], FONT_HERSHEY_SIMPLEX, (double)scale_font, (int)(3*scale_font), NULL); 158 rectangle(out_img, boxes[j].tl()-Point(3,word_size.height+3), boxes[j].tl()+Point(word_size.width,0), Scalar(255,0,255),-1); 159 putText(out_img, words[j], boxes[j].tl()-Point(1,1), FONT_HERSHEY_SIMPLEX, scale_font, Scalar(255,255,255),(int)(3*scale_font)); 160 out_img_segmentation = out_img_segmentation | group_segmentation; 161 } 162 163 } 164 165 cout << "TIME_OCR = " << ((double)getTickCount() - t_r)*1000/getTickFrequency() << endl; 166 167 168 /* Recognition evaluation with (approximate) Hungarian matching and edit distances */ 169 170 if(argc>2) 171 { 172 int num_gt_characters = 0; 173 vector<string> words_gt; 174 for (int i=2; i<argc; i++) 175 { 176 string s = string(argv[i]); 177 if (s.size() > 0) 178 { 179 words_gt.push_back(string(argv[i])); 180 //cout << " GT word " << words_gt[words_gt.size()-1] << endl; 181 num_gt_characters += (int)(words_gt[words_gt.size()-1].size()); 182 } 183 } 184 185 if (words_detection.empty()) 186 { 187 //cout << endl << "number of characters in gt = " << num_gt_characters << endl; 188 cout << "TOTAL_EDIT_DISTANCE = " << num_gt_characters << endl; 189 cout << "EDIT_DISTANCE_RATIO = 1" << endl; 190 } 191 else 192 { 193 194 sort(words_gt.begin(),words_gt.end(),sort_by_lenght); 195 196 int max_dist=0; 197 vector< vector<int> > assignment_mat; 198 for (int i=0; i<(int)words_gt.size(); i++) 199 { 200 vector<int> assignment_row(words_detection.size(),0); 201 assignment_mat.push_back(assignment_row); 202 for (int j=0; j<(int)words_detection.size(); j++) 203 { 204 assignment_mat[i][j] = (int)(edit_distance(words_gt[i],words_detection[j])); 205 max_dist = max(max_dist,assignment_mat[i][j]); 206 } 207 } 208 209 vector<int> words_detection_matched; 210 211 int total_edit_distance = 0; 212 int tp=0, fp=0, fn=0; 213 for (int search_dist=0; search_dist<=max_dist; search_dist++) 214 { 215 for (int i=0; i<(int)assignment_mat.size(); i++) 216 { 217 int min_dist_idx = (int)distance(assignment_mat[i].begin(), 218 min_element(assignment_mat[i].begin(),assignment_mat[i].end())); 219 if (assignment_mat[i][min_dist_idx] == search_dist) 220 { 221 //cout << " GT word \"" << words_gt[i] << "\" best match \"" << words_detection[min_dist_idx] << "\" with dist " << assignment_mat[i][min_dist_idx] << endl; 222 if(search_dist == 0) 223 tp++; 224 else { fp++; fn++; } 225 226 total_edit_distance += assignment_mat[i][min_dist_idx]; 227 words_detection_matched.push_back(min_dist_idx); 228 words_gt.erase(words_gt.begin()+i); 229 assignment_mat.erase(assignment_mat.begin()+i); 230 for (int j=0; j<(int)assignment_mat.size(); j++) 231 { 232 assignment_mat[j][min_dist_idx]=INT_MAX; 233 } 234 i--; 235 } 236 } 237 } 238 239 for (int j=0; j<(int)words_gt.size(); j++) 240 { 241 //cout << " GT word \"" << words_gt[j] << "\" no match found" << endl; 242 fn++; 243 total_edit_distance += (int)words_gt[j].size(); 244 } 245 for (int j=0; j<(int)words_detection.size(); j++) 246 { 247 if (find(words_detection_matched.begin(),words_detection_matched.end(),j) == words_detection_matched.end()) 248 { 249 //cout << " Detection word \"" << words_detection[j] << "\" no match found" << endl; 250 fp++; 251 total_edit_distance += (int)words_detection[j].size(); 252 } 253 } 254 255 256 //cout << endl << "number of characters in gt = " << num_gt_characters << endl; 257 cout << "TOTAL_EDIT_DISTANCE = " << total_edit_distance << endl; 258 cout << "EDIT_DISTANCE_RATIO = " << (float)total_edit_distance / num_gt_characters << endl; 259 cout << "TP = " << tp << endl; 260 cout << "FP = " << fp << endl; 261 cout << "FN = " << fn << endl; 262 } 263 } 264 265 266 267 //resize(out_img_detection,out_img_detection,Size(image.cols*scale_img,image.rows*scale_img),0,0,INTER_LINEAR_EXACT); 268 //imshow("detection", out_img_detection); 269 //imwrite("detection.jpg", out_img_detection); 270 //resize(out_img,out_img,Size(image.cols*scale_img,image.rows*scale_img),0,0,INTER_LINEAR_EXACT); 271 namedWindow("recognition",WINDOW_NORMAL); 272 imshow("recognition", out_img); 273 waitKey(0); 274 //imwrite("recognition.jpg", out_img); 275 //imwrite("segmentation.jpg", out_img_segmentation); 276 //imwrite("decomposition.jpg", out_img_decomposition); 277 278 return 0; 279 } 280 281 size_t min(size_t x, size_t y, size_t z) 282 { 283 return x < y ? min(x,z) : min(y,z); 284 } 285 286 size_t edit_distance(const string& A, const string& B) 287 { 288 size_t NA = A.size(); 289 size_t NB = B.size(); 290 291 vector< vector<size_t> > M(NA + 1, vector<size_t>(NB + 1)); 292 293 for (size_t a = 0; a <= NA; ++a) 294 M[a][0] = a; 295 296 for (size_t b = 0; b <= NB; ++b) 297 M[0][b] = b; 298 299 for (size_t a = 1; a <= NA; ++a) 300 for (size_t b = 1; b <= NB; ++b) 301 { 302 size_t x = M[a-1][b] + 1; 303 size_t y = M[a][b-1] + 1; 304 size_t z = M[a-1][b-1] + (A[a-1] == B[b-1] ? 0 : 1); 305 M[a][b] = min(x,y,z); 306 } 307 308 return M[A.size()][B.size()]; 309 } 310 311 bool isRepetitive(const string& s) 312 { 313 int count = 0; 314 for (int i=0; i<(int)s.size(); i++) 315 { 316 if ((s[i] == 'i') || 317 (s[i] == 'l') || 318 (s[i] == 'I')) 319 count++; 320 } 321 if (count > ((int)s.size()+1)/2) 322 { 323 return true; 324 } 325 return false; 326 } 327 328 329 void er_draw(vector<Mat> &channels, vector<vector<ERStat> > ®ions, vector<Vec2i> group, Mat& segmentation) 330 { 331 for (int r=0; r<(int)group.size(); r++) 332 { 333 ERStat er = regions[group[r][0]][group[r][1]]; 334 if (er.parent != NULL) // deprecate the root region 335 { 336 int newMaskVal = 255; 337 int flags = 4 + (newMaskVal << 8) + FLOODFILL_FIXED_RANGE + FLOODFILL_MASK_ONLY; 338 floodFill(channels[group[r][0]],segmentation,Point(er.pixel%channels[group[r][0]].cols,er.pixel/channels[group[r][0]].cols), 339 Scalar(255),0,Scalar(er.level),Scalar(0),flags); 340 } 341 } 342 } 343 344 bool sort_by_lenght(const string &a, const string &b){return (a.size()>b.size());}
测试用到的文件end_to_end_recognition.cpp、scenetext01.jpg、trained_classifierNM1.xml、trained_classifierNM2.xml都位于opencv_contrib源码包目录下的modules\text\samples中
效果: