tesseract 样例

最新推荐文章于 2024-07-30 17:30:49 发布

Claroja

最新推荐文章于 2024-07-30 17:30:49 发布

阅读量919

点赞数 1

分类专栏：图像识别文章标签：图像识别

图像识别专栏收录该内容

94 篇文章 2 订阅

订阅专栏

1.基础样例

#include <tesseract/baseapi.h>
#include <leptonica/allheaders.h>

int main()
{
    char *outText;

    tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
    // 初始化tesseract-ocr，使用英语，不指定数据路径
    if (api->Init(NULL, "eng")) {
        fprintf(stderr, "Could not initialize tesseract.\n");
        exit(1);
    }

    // 使用 leptonica library 打开图片
    Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
    api->SetImage(image);
    // 获得最终的结果
    outText = api->GetUTF8Text();
    printf("OCR output:\n%s", outText);

    // 删除对象释放内存
    api->End();
    delete [] outText;
    pixDestroy(&image);

    return 0;
}

2.获得元素图片样例

  Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
  tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  api->Init(NULL, "eng"); //初始化tesseract
  api->SetImage(image); //获得图像
  Boxa* boxes = api->GetComponentImages(tesseract::RIL_TEXTLINE, true, NULL, NULL); //获得元素图像
  printf("Found %d textline image components.\n", boxes->n);
  for (int i = 0; i < boxes->n; i++) {
    BOX* box = boxaGetBox(boxes, i, L_CLONE);
    api->SetRectangle(box->x, box->y, box->w, box->h); //获得元素的位置
    char* ocrResult = api->GetUTF8Text();  //获得对应文字
    int conf = api->MeanTextConf();
    fprintf(stdout, "Box[%d]: x=%d, y=%d, w=%d, h=%d, confidence: %d, text: %s",
                    i, box->x, box->y, box->w, box->h, conf, ocrResult);
  }

以上是获得元素图片的基本方法，下面分析它所用到的一些方法。

Boxa* GetComponentImages(const PageIteratorLevel level,
                           const bool text_only,
                           Pixa** pixa, int** blockids)

GetComponentImages方法有四个参数，第一个参数设置分割的等级，它总共有5个选项。

选项	描述	原文
RIL_BLOCK	按块分	Block of text/image/separator line
RIL_PARA	按段分	Paragraph within a block
RIL_TEXTLINE	按行分	Line within a paragraph
RIL_WORD	按单词分	Word within a textline
RIL_SYMBOL	按字母分	Symbol/character within a word

这些参数的好处是可以将图像，不同类别的文字区别开，然后再进行处理。

第二个参数text_only为真，则只返回文字区域坐标，不返回图像区域坐标。
第三个参数pixa用于返回分割出来的图像，Null表示不返回图像。
第四个参数blockids返回序列号

该方法的返回值是一个结构体

struct Box
{
    l_int32            x;
    l_int32            y;
    l_int32            w;
    l_int32            h;
    l_uint32           refcount;      /* reference count (1 if no clones)  */
 
};
typedef struct Box    BOX;
 
struct Boxa
{
    l_int32            n;             /* number of box in ptr array        */
    l_int32            nalloc;        /* number of box ptrs allocated      */
    l_uint32           refcount;      /* reference count (1 if no clones)  */
    struct Box       **box;           /* box ptr array                     */
};
typedef struct Boxa  BOXA;

boxaGetBox是获得矩形数组中的某个矩形，L_CLONE是软拷贝，L_COPY是硬拷贝
MeanTextConf用于返回置信度

3.结果迭代样例

Pix *image = pixRead("/usr/src/tesseract/testing/phototest.tif");
  tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  api->Init(NULL, "eng");  //初始化
  api->SetImage(image);  //读取图片
  api->Recognize(0);
  tesseract::ResultIterator* ri = api->GetIterator();
  tesseract::PageIteratorLevel level = tesseract::RIL_WORD;
  if (ri != 0) {
    do {
      const char* word = ri->GetUTF8Text(level);
      float conf = ri->Confidence(level);
      int x1, y1, x2, y2;
      ri->BoundingBox(level, &x1, &y1, &x2, &y2);
      printf("word: '%s';  \tconf: %.2f; BoundingBox: %d,%d,%d,%d;\n",
               word, conf, x1, y1, x2, y2);
      delete[] word;
    } while (ri->Next(level));
  }

4. osd样例

  const char* inputfile = "/usr/src/tesseract/testing/eurotext.tif";
  tesseract::Orientation orientation;
  tesseract::WritingDirection direction;
  tesseract::TextlineOrder order;
  float deskew_angle;

  PIX *image = pixRead(inputfile);
  tesseract::TessBaseAPI *api = new tesseract::TessBaseAPI();
  api->Init("/usr/src/tesseract/", "eng");
  api->SetPageSegMode(tesseract::PSM_AUTO_OSD);
  api->SetImage(image);
  api->Recognize(0);

  tesseract::PageIterator* it =  api->AnalyseLayout();
  it->Orientation(&orientation, &direction, &order, &deskew_angle);
  printf("Orientation: %d;\nWritingDirection: %d\nTextlineOrder: %d\n" \
         "Deskew angle: %.4f\n",
         orientation, direction, order, deskew_angle);

osd进行页面的方向检测和文字的方向检测，文字方向检测主要是看文字的阅读方向。
SetPageSegMode设置页面分割模式。

enum PageSegMode {
  PSM_OSD_ONLY,       ///< Orientation and script detection only.
  PSM_AUTO_OSD,       ///< Automatic page segmentation with orientation and
                      ///< script detection. (OSD)
  PSM_AUTO_ONLY,      ///< Automatic page segmentation, but no OSD, or OCR.
  PSM_AUTO,           ///< Fully automatic page segmentation, but no OSD.
  PSM_SINGLE_COLUMN,  ///< Assume a single column of text of variable sizes.
  PSM_SINGLE_BLOCK_VERT_TEXT,  ///< Assume a single uniform block of vertically
                               ///< aligned text.
  PSM_SINGLE_BLOCK,   ///< Assume a single uniform block of text. (Default.)
  PSM_SINGLE_LINE,    ///< Treat the image as a single text line.
  PSM_SINGLE_WORD,    ///< Treat the image as a single word.
  PSM_CIRCLE_WORD,    ///< Treat the image as a single word in a circle.
  PSM_SINGLE_CHAR,    ///< Treat the image as a single character.
  PSM_SPARSE_TEXT,    ///< Find as much text as possible in no particular order.
  PSM_SPARSE_TEXT_OSD,  ///< Sparse text with orientation and script det.
  PSM_RAW_LINE,       ///< Treat the image as a single text line, bypassing
                      ///< hacks that are Tesseract-specific.
 
  PSM_COUNT           ///< Number of enum entries.
};

参考文献:
https://github.com/tesseract-ocr/tesseract/wiki/APIExample