话不多说,直接上demo
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");//读取图像
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init(NULL, "eng");//初始化
api->SetImage(image);//载入图像
Boxa* boxes = api->GetComponentImages(tesseract::RIL_SYMBOL, true, NULL, NULL);//获得文本框坐标信息
printf("Found %d textline image components.\n", boxes->n);//输出文本框的数量
for (int i = 0; i < boxes->n; i++) {//将文本框逐个识别
BOX* box = boxaGetBox(boxes, i, L_CLONE);
api->SetRectangle(box->x, box->y, box->w, box->h);//将第i个文本框设置为识别区域,即只识别这一个小区域
char* ocrResult = api->GetUTF8Text();//获得识别结果
int conf = api->MeanTextConf();//获得置信度
fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
i, box->x, box->y, box->w, box->h, conf, ocrResult);//输出坐标框的信息
boxDestroy(&box);//销毁相关结构体
}
函数原型及其关键参数
Boxa* GetComponentImages(const PageIteratorLevel level,
const bool text_only,
Pixa** pixa, int** blockids)
根据PageIteratorLevel 参数可以选择分割到一块、一段、一行、一个单词或者一个单字。
enum PageIteratorLevel {
RIL_BLOCK, // Block of text/image/separator line.
RIL_PARA, // Paragraph within a block.
RIL_TEXTLINE, // Line within a paragraph.
RIL_WORD, // Word within a textline.
RIL_SYMBOL // Symbol/character within a word.
};
先关结构体
struct Boxa
{
l_int32 n; /*!< number of box in ptr array */
l_int32 nalloc; /*!< number of box ptrs allocated */
l_uint32 refcount; /*!< reference count (1 if no clones) */
struct Box **box; /*!< box ptr array */
};
struct Box
{
l_int32 x; /*!< left coordinate */
l_int32 y; /*!< top coordinate */
l_int32 w; /*!< box width */
l_int32 h; /*!< box height */
l_uint32 refcount; /*!< reference count (1 if no clones) */
};