java图片文字识别_java 图片文字识别 ocr

packageorg.ink.image.textrz;importjava.io.BufferedReader;importjava.io.File;importjava.io.FileInputStream;importjava.io.IOException;importjava.io.InputStreamReader;importjava.util.ArrayList;importjava.util.List;importjava.util.Locale;importorg.jdesktop.swingx.util.OS;/*** TEXT Recognize Utils

*@authorink.Flower

**/

public classOCRUtil {private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1

private final String EOL = System.getProperty("line.separator");private String tessPath = "C://Program Files (x86)//Tesseract-OCR";//ocr默认安装路径

private String transname="chi_sim";//默认中文语言包,识别中文/*** Construct method of OCR ,set Tesseract-OCR install path

*@paramtessPath Tesseract-OCR install path

*@paramtransFileName traningFile name like eng.traineddata*/

publicOCRUtil(String tessPath,String transFileName){this.tessPath=tessPath;this.transname=transFileName;

}/*** Construct method of OCR,default path is "C://Program Files (x86)//Tesseract-OCR"*/

publicOCRUtil(){ }publicString getTessPath() {returntessPath;

}public voidsetTessPath(String tessPath) {this.tessPath =tessPath;

}publicString getTransname() {returntransname;

}public voidsetTransname(String transname) {this.transname =transname;

}publicString getLANG_OPTION() {returnLANG_OPTION;

}publicString getEOL() {returnEOL;

}/*** recognize text in image

*@paramimageFile

*@paramimageFormat

*@returntext recognized in image

*@throwsException*/

public String recognizeText(File imageFile,String imageFormat)throwsException{

File tempImage= newImageIOHelper().createImage(imageFile,imageFormat);returnocrImages(tempImage, imageFile);

}/*** recognize text in image

*@paramimageFile

*@paramimageFormat

*@paramlocale

*@returntext recognized in image

*@throwsException*/

public String recognizeText(File imageFile,String imageFormat,Locale locale)throwsException{

File tempImage= newImageIOHelper(locale).createImage(imageFile,imageFormat);returnocrImages(tempImage, imageFile);

}/***

*@paramtempImage

*@paramimageFile

*@return*@throwsIOException

*@throwsInterruptedException*/

private String ocrImages(File tempImage,File imageFile) throwsIOException, InterruptedException{

File outputFile= new File(imageFile.getParentFile(),"output");

Runtime.getRuntime().exec("attrib "+"\""+outputFile.getAbsolutePath()+"\""+" +H"); //设置文件隐藏

StringBuffer strB = newStringBuffer();

List cmd = new ArrayList();if(OS.isWindowsXP()){

cmd.add(tessPath+"//tesseract");

}else if(OS.isLinux()){

cmd.add("tesseract");

}else{

cmd.add(tessPath+"//tesseract");

}

cmd.add("");

cmd.add(outputFile.getName());

cmd.add(LANG_OPTION);

cmd.add(transname);

ProcessBuilder pb= newProcessBuilder();

pb.directory(imageFile.getParentFile());

cmd.set(1, tempImage.getName());

pb.command(cmd);

pb.redirectErrorStream(true);

Process process=pb.start();int w =process.waitFor();

tempImage.delete();//删除临时正在工作文件

if(w==0){

BufferedReader in= new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));

String str;while((str = in.readLine())!=null){

strB.append(str).append(EOL);

}

in.close();

}else{

String msg;switch(w){case 1:

msg= "Errors accessing files.There may be spaces in your image's filename.";break;case 29:

msg= "Cannot recongnize the image or its selected region.";break;case 31:

msg= "Unsupported image format.";break;default:

msg= "Errors occurred.";

}

tempImage.delete();throw newRuntimeException(msg);

}new File(outputFile.getAbsolutePath()+".txt").delete();returnstrB.toString();

}

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论
以下是使用 Tesseract OCR 库进行图片文字识别Java 代码: ```java import java.io.File; import java.io.IOException; import net.sourceforge.tess4j.Tesseract; import net.sourceforge.tess4j.TesseractException; public class OCR { public static void main(String[] args) { File imageFile = new File("image.png"); Tesseract tesseract = new Tesseract(); try { String result = tesseract.doOCR(imageFile); System.out.println(result); } catch (TesseractException e) { System.err.println(e.getMessage()); } } } ``` 在这个示例中,我们使用了 Tesseract OCR 库来识别名为 `image.png` 的图像文件中的文本。请注意,要使用 Tesseract OCR 库,您需要将其添加到项目的类路径中。 您还需要将以下依赖项添加到您的项目中: ```xml <dependency> <groupId>net.sourceforge.tess4j</groupId> <artifactId>tess4j</artifactId> <version>4.5.1</version> </dependency> <dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-core</artifactId> <version>1.4.0</version> </dependency> <dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-jpeg2000</artifactId> <version>1.3.0</version> </dependency> <dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-metadata</artifactId> <version>1.3.0</version> </dependency> <dependency> <groupId>com.github.jai-imageio</groupId> <artifactId>jai-imageio-impl</artifactId> <version>1.3.0</version> </dependency> ``` 请注意,这些依赖项可能因版本而异。最好查看 Tesseract OCR 库的文档以获取正确的依赖项。
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值