1.基础样例
#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>
int main()
{
char *outText;
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
// 初始化tesseract-ocr,使用英语,不指定数据路径
if (api->Init(NULL, "eng")) {
fprintf(stderr, "Could not initialize tesseract.\n");
exit(1);
}
// 使用 leptonica library 打开图片
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
api->SetImage(image);
// 获得最终的结果
outText = api->GetUTF8Text();
printf("OCR output:\n%s", outText);
// 删除对象释放内存
api->End();
delete [] outText;
pixDestroy(&image);
return 0;
}
2.获得元素图片样例
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init(NULL, "eng"); //初始化tesseract
api->SetImage(image); //获得图像
Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL); //获得元素图像
printf("Found %d textline image components.\n", boxes->n);
for (int i = 0; i < boxes->n; i++) {
BOX* box = boxaGetBox(boxes, i, L_CLONE);
api->SetRectangle(box->x, box->y, box->w, box->h); //获得元素的位置
char* ocrResult = api->GetUTF8Text(); //获得对应文字
int conf = api->MeanTextConf();
fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
i, box->x, box->y, box->w, box->h, conf, ocrResult);
}
以上是获得元素图片的基本方法,下面分析它所用到的一些方法。
Boxa* GetComponentImages(const PageIteratorLevel level,
const bool text_only,
Pixa** pixa, int** blockids)
GetComponentImages
方法有四个参数,第一个参数设置分割的等级,它总共有5个选项。
选项 | 描述 | 原文 |
---|---|---|
RIL_BLOCK | 按块分 | Block of text/image/separator line |
RIL_PARA | 按段分 | Paragraph within a block |
RIL_TEXTLINE | 按行分 | Line within a paragraph |
RIL_WORD | 按单词分 | Word within a textline |
RIL_SYMBOL | 按字母分 | Symbol/character within a word |
这些参数的好处是可以将图像,不同类别的文字区别开,然后再进行处理。
第二个参数text_only为真,则只返回文字区域坐标,不返回图像区域坐标。
第三个参数pixa用于返回分割出来的图像,Null表示不返回图像。
第四个参数blockids返回序列号
该方法的返回值是一个结构体
struct Box
{
l_int32 x;
l_int32 y;
l_int32 w;
l_int32 h;
l_uint32 refcount; /* reference count (1 if no clones) */
};
typedef struct Box BOX;
struct Boxa
{
l_int32 n; /* number of box in ptr array */
l_int32 nalloc; /* number of box ptrs allocated */
l_uint32 refcount; /* reference count (1 if no clones) */
struct Box **box; /* box ptr array */
};
typedef struct Boxa BOXA;
boxaGetBox
是获得矩形数组中的某个矩形,L_CLONE是软拷贝,L_COPY是硬拷贝
MeanTextConf
用于返回置信度
3.结果迭代样例
Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init(NULL, "eng"); //初始化
api->SetImage(image); //读取图片
api->Recognize(0);
tesseract::ResultIterator* ri = api->GetIterator();
tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
if (ri != 0) {
do {
const char* word = ri->GetUTF8Text(level);
float conf = ri->Confidence(level);
int x1, y1, x2, y2;
ri->BoundingBox(level, &x1, &y1, &x2, &y2);
printf("word: '%s'; \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
word, conf, x1, y1, x2, y2);
delete[] word;
} while (ri->Next(level));
}
4. osd样例
const char* inputfile = "/usr/src/tesseract/testing/eurotext.tif";
tesseract::Orientation orientation;
tesseract::WritingDirection direction;
tesseract::TextlineOrder order;
float deskew_angle;
PIX *image = pixRead(inputfile);
tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
api->Init("/usr/src/tesseract/", "eng");
api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
api->SetImage(image);
api->Recognize(0);
tesseract::PageIterator* it = api->AnalyseLayout();
it->Orientation(&orientation, &direction, &order, &deskew_angle);
printf("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\n" \
"Deskew angle: %.4f\n",
orientation, direction, order, deskew_angle);
osd进行页面的方向检测和文字的方向检测,文字方向检测主要是看文字的阅读方向。
SetPageSegMode
设置页面分割模式。
enum PageSegMode {
PSM_OSD_ONLY, ///< Orientation and script detection only.
PSM_AUTO_OSD, ///< Automatic page segmentation with orientation and
///< script detection. (OSD)
PSM_AUTO_ONLY, ///< Automatic page segmentation, but no OSD, or OCR.
PSM_AUTO, ///< Fully automatic page segmentation, but no OSD.
PSM_SINGLE_COLUMN, ///< Assume a single column of text of variable sizes.
PSM_SINGLE_BLOCK_VERT_TEXT, ///< Assume a single uniform block of vertically
///< aligned text.
PSM_SINGLE_BLOCK, ///< Assume a single uniform block of text. (Default.)
PSM_SINGLE_LINE, ///< Treat the image as a single text line.
PSM_SINGLE_WORD, ///< Treat the image as a single word.
PSM_CIRCLE_WORD, ///< Treat the image as a single word in a circle.
PSM_SINGLE_CHAR, ///< Treat the image as a single character.
PSM_SPARSE_TEXT, ///< Find as much text as possible in no particular order.
PSM_SPARSE_TEXT_OSD, ///< Sparse text with orientation and script det.
PSM_RAW_LINE, ///< Treat the image as a single text line, bypassing
///< hacks that are Tesseract-specific.
PSM_COUNT ///< Number of enum entries.
};
参考文献:
https://github.com/tesseract-ocr/tesseract/wiki/APIExample