首先需要下载训练库
文中setDatapath的路径要设置训练库所在位置路径
训练库文件网址
package TestOCR;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.awt.*;
import java.io.BufferedReader;
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Scanner;
import static org.apache.pdfbox.cos.COSName.AA;
public class TestTess4j {
public static void main(String[] args) throws IOException {
ITesseract in = new Tesseract();
in.setDatapath("G:\\lan\\1");//设置训练库本地位置 默认位置./当前目录
in.setLanguage("chi_sim");//设置语言 默认是eng英文
boolean f = true;
Scanner scanner = new Scanner(System.in);
while(f) {
System.out.println("请输入" + ":exit 或者 文件路径");
String line = scanner.nextLine();
line= line.replace("\\","\\\\");
switch (line){
case "exit":
System.exit(0);
break;
default:
try {
Rectangle r1 = new Rectangle(0,0,704,40);
Rectangle r2 = new Rectangle(0,41,704,30);
Rectangle r3 = new Rectangle(0,70,704,29);
List<Rectangle> list = new ArrayList<>();
list.add(r1);
list.add(r2);
list.add(r3);
for (Rectangle r: list
) {
String result = in.doOCR(new File(line),r);
System.out.println(line+"==resule===========:" + result);
}
} catch (TesseractException e) {
e.printStackTrace();
};break;
}
}
}
}
测试图片
运行结果图