话不多说直接上代码
中文库可以自己搜索也可以github上下载很多
chi_sim.traineddata
package com.doing.utils;
import lombok.extern.slf4j.Slf4j;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import java.io.BufferedOutputStream;
import java.io.File;
import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
@Slf4j
public class OcrLocalTess4j {
/**
* @Author Mr伍
* @Description //TODO
* @Date 2021/3/3
* @Param bytes 图片文件的byte数组
* @return java.lang.String 识别出来的字符串
**/
public static String dowl_Ocr(byte[] bytes) throws RuntimeException, IOException {
File file = null;
//创建文件目录
String filePath = DefaultConfig.getString("tempfilejpgPath");
//语言库路径
String xlLib=DefaultConfig.getString("languagePath");
//零时文件名
String fname="tem.jpg";
File dir = new File(filePath);
if (!dir.exists() && !dir.isDirectory()) {
dir.mkdirs();
}
BufferedOutputStream bos1 = null;
java.io.FileOutputStream fos1 = null;
try {
file = new File(filePath + "\\"+fname);
fos1 = new java.io.FileOutputStream(file);
bos1 = new BufferedOutputStream(fos1);
bos1.write(bytes);
} catch (Exception e) {
e.printStackTrace();
log.error(e.getMessage());
} finally {
if (bos1 != null) {
try {
bos1.close();
} catch (IOException e) {
e.printStackTrace();
}
}
if (fos1 != null) {
try {
fos1.close();
} catch (IOException e) {
e.printStackTrace();
}
}
ITesseract instance = new Tesseract();
//设置训练库的位置
instance.setDatapath(xlLib);
//chi_sim 中文 eng英文
instance.setLanguage("chi_sim");
String result = null;
try {
//doOCR不止提供File类型还有BufferedImage详情看源码
result = instance.doOCR(file);
} catch (TesseractException e) {
log.error(e.getMessage());
e.printStackTrace();
}
//匹配空格换行符
Pattern pattern = Pattern.compile("\\s*|\t|\r|\n");
Matcher m = pattern.matcher(result);
//替换成空字符
String strNoBlank = m.replaceAll("");
System.out.println("result:"+strNoBlank );
//查询需要的数字开头结尾截取出来(自由扩展)
strNoBlank = strNoBlank.substring(strNoBlank.indexOf("5000"), strNoBlank.indexOf("5000") + 11);
System.err.println("识别出的数字"+strNoBlank);
return strNoBlank;
}
}
}
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>4.4.0</version>
<!--排除部分-->
<exclusions>
<exclusion>
<artifactId>log4j-over-slf4j</artifactId>
<groupId>org.slf4j</groupId>
</exclusion>
<exclusion>
<artifactId>logback-classic</artifactId>
<groupId>ch.qos.logback</groupId>
</exclusion>
</exclusions>
</dependency>