使用动态库的调用方式:
1. Download tesseract-3.02.02-win32-lib-include-dirs 和leptonica-1.68-win32-lib-include-dirs from http://code.google.com/p/tesseract-ocr/downloads/list
2. 创建工程named TesseractSample 将tesseract-3.02.02-win32-lib-include-dirs里的include 和lib文件夹拷贝到TesseractSample下,将leptonica-1.68-win32-lib-include-dirs 内lib文件夹内的lept168d.lib 和liblept168d.dll 拷贝到TesseractSample工程lib文件夹下
3. 配置工程
项目->属性->c/c++->附加包含目录:..\include\tesseract
项目->属性->链接器->附加库目录:..\lib
代码:
#include "strngs.h"
#include "baseapi.h"
#pragma comment(lib,"libtesseract302d.lib")
#include <iostream>
using namespace std;
#define FILEPATH "F:\\TesseractSample\\Debug\\eng.bmp"
#define DIRPATH "F:\\TesseractSample\\Debug"
//#define FILEPATH "newpic.tif"
string UTF8ToGBK(const std::string& strUTF8)
{
int len = MultiByteToWideChar(CP_UTF8, 0, strUTF8.c_str(), -1, NULL, 0);
unsigned short * wszGBK = new unsigned short[len + 1];
memset(wszGBK, 0, len * 2 + 2);
MultiByteToWideChar(CP_UTF8, 0,LPCSTR(strUTF8.c_str()), -1, LPWSTR(wszGBK), len);
len = WideCharToMultiByte(CP_ACP, 0,LPCWSTR(wszGBK), -1, NULL, 0, NULL, NULL);
char *szGBK = new char[len + 1];
memset(szGBK, 0, len + 1);
WideCharToMultiByte(CP_ACP,0, LPCWSTR(wszGBK), -1, szGBK, len, NULL, NULL);
//strUTF8 = szGBK;
std::string strTemp(szGBK);
delete[]szGBK;
delete[]wszGBK;
return strTemp;
}
int _tmain(int argc, _TCHAR* argv[])
{
/*string tmp="";
if(getenv("TESSDATA_PREFIX"))
{
tmp = getenv("TESSDATA_PREFIX");
}*/
tesseract::TessBaseAPI api;
//DIRPATH 是程序的运行目录,如果将这个参数设置成null,那么需要设置环境变量TESSDATA_PREFIX
api.Init(DIRPATH, "eng", tesseract::OEM_DEFAULT); //init set lang chinese: chi_sim,English:eng
STRING text_out;
if (!api.ProcessPages(FILEPATH, NULL, 0, &text_out))
{
return 0;
}
cout<<text_out.string()<<endl;
cout<<UTF8ToGBK(text_out.string()).c_str()<<endl;
system("pause");
return 0;
}