Tesseract-OCR安装
Tesseract-OCR下载地址:https://github.com/UB-Mannheim/tesseract/wiki,下载tesseract安装包
安装tesseract,双击进行运行安装。全部点next就行。自己选择安装的位置
配置tesseract环境变量
系统环境变量–>path,添加Tesseract-OCR的路径位置
系统变量–>新建
变量名:TESSDATA_PREFIX
变量值: D:\java工具\Tesseract-ocr\tessdata 你Tesseract安装的位置我是默认的安装位置
这一步是添加Tesseract-OCR的语言库,语言库地址:https://github.com/tesseract-ocr/tessdata。你还可以根据实际情况去自己训练自己的语言
Tesseract-ORC与java代码结合。截图获取图片中信息。不截屏可以直接换成图片地址。
import javax.imageio.ImageIO;
import java.awt.*;
import java.awt.image.BufferedImage;
import java.io.File;
import java.io.IOException;
public class xxx {
public static void main(String[] args) {
try {
jp();
//图片文件:此图片是需要被识别的图片
File file = new File("D:\\java工具\\Tesseract-ocr\\tessdata\\图片\\test.png");
System.out.println("begin.........");
String recognizeText1 = new test01().recognizeText(file);
System.out.print(recognizeText1+"\t");
} catch (IOException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
}
//截屏
public static void jp(){
Dimension screenSize = Toolkit.getDefaultToolkit().getScreenSize();
// 创建需要截取的矩形区域
Rectangle rect = new Rectangle(0, 0, screenSize.width, screenSize.height);
// 截屏操作
BufferedImage bufImage = null;
try {
bufImage = new Robot().createScreenCapture(rect); //存入截图
if (ImageIO.write(bufImage, "PNG", new File("D:\\java工具\\Tesseract-ocr\\tessdata\\图片\\test.png"))) {
System.out.println("截屏成功");
}
} catch (Exception e) {
throw new RuntimeException(e);
}
}
}
调用下面的test01方法,进行读取数据
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.util.ArrayList;
import java.util.List;
public class test01 {
private final String LANG_OPTION = "-l";
private final String EOL = System.getProperty("line.separator");
/**
* Tesseract-OCR的安装路径
*/
private String tessPath = "D://java工具//Tesseract-ocr";
/**
* @param imageFile 传入的图像文
* @return 识别后的字符串
*/
public String recognizeText(File imageFile) throws Exception {
/**
* 设置输出文件的保存的文件目录,创建output文件夹
*/
File outputFile = new File(imageFile.getParentFile(), "output");
StringBuffer strB = new StringBuffer();
List<String> cmd = new ArrayList<String>();
cmd.add(tessPath + "\\tesseract");
cmd.add("");
cmd.add(outputFile+"\\"+outputFile.getName());
cmd.add(LANG_OPTION);
// cmd.add("chi_sim");// 切换中文识别语言或者你自己训练的语言
cmd.add("eng");
ProcessBuilder pb = new ProcessBuilder();
pb.directory(new File(tessPath));
cmd.set(1, imageFile.getParentFile()+"\\"+imageFile.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
long startTime = System.currentTimeMillis();
System.out.println("开始时间:" + startTime);
Process process = pb.start();/**/
int w = process.waitFor();
if (w == 0)// 0代表正常退出
{
BufferedReader in = new BufferedReader(new InputStreamReader(
new FileInputStream(outputFile+"\\"+outputFile.getName()+".txt"),
"UTF-8"));
String str;
while ((str = in.readLine()) != null) {
strB.append(str).append(EOL);
}
in.close();
long endTime = System.currentTimeMillis();
System.out.println("结束时间:" + endTime);
System.out.println("耗时:" + (endTime - startTime) + "毫秒");
} else {
String msg;
switch (w) {
case 1:
msg = "Errors accessing files. There may be spaces in your image's filename.";
break;
case 29:
msg = "Cannot recognize the image or its selected region.";
break;
case 31:
msg = "Unsupported image format.";
break;
default:
msg = "Errors occurred.";
}
throw new RuntimeException(msg);
}
// 删除生成的文件
new File(outputFile+"\\"+outputFile.getName()+".txt").delete();
return strB.toString().replaceAll("\\s*", "");
}
}
结果: