- jar包
group: 'net.sourceforge.tess4j', name: 'tess4j', version: '4.4.1'
- 语言库
https://github.com/tesseract-ocr/tessdata
- 代码实例(对图片中字体很小的识别率较低)
package tess;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import java.io.File;
public class Tess {
public static String getPicWord(String path) {
String result;
ITesseract instance = new Tesseract();
instance.setDatapath("tessdata"); //相对目录,这个时候tessdata目录和src目录平级(也可使用绝对路径)
instance.setLanguage("chi_sim");//选择字库文件(只需要文件名,不需要后缀名)
try {
File imageFile = new File(path);
result = instance.doOCR(imageFile);//开始识别
} catch (Exception e) {
result = e.toString();//打印图片内容
}
return result;
}
public static void main(String args[]) throws Exception {
System.out.println(Tess.getPicWord("C:\\Users\\Administrator\\Desktop\\1.png"));
}
}