1、引入依赖
<dependency>
<groupId>org.bytedeco</groupId>
<artifactId>javacv-platform</artifactId>
<version>1.5.8</version>
</dependency>
2、下载语言包
GitHub - tesseract-ocr/tessdata: Trained models with support for legacy and LSTM OCR engine
3、代码
/**
* 识别图片中文字
* @param language 语言
* @param dataPath 语言包位置
* @param imageUrl 图片本地路径
* @return
*/
public static String ocr(String language,String dataPath, String imageUrl) {
BytePointer outText;
TessBaseAPI api = new TessBaseAPI();
// Initialize tesseract-ocr with English, without specifying tessdata path
if (api.Init(dataPath, language) != 0) {
System.err.println("Could not initialize tesseract.");
return null;
}
// Open input image with leptonica library
PIX image = pixRead(imageUrl);
api.SetImage(image);
// Get OCR result
outText = api.GetUTF8Text();
try {
return outText.getString();
}finally{
// Destroy used object and release memory
api.End();
outText.deallocate();
pixDestroy(image);
}
}
public static void main(String[] args) {
//OCR
String ret= ocr("chi_sim","F:\\video_process\\ocr\\tessdata-main", "F:\\video_process\\image\\b.jpg");
System.out.println("OCR output:\n" +ret);
}