java图片文字识别_java 图片文字识别 ocr

packageorg.ink.image.textrz;importjava.io.BufferedReader;importjava.io.File;importjava.io.FileInputStream;importjava.io.IOException;importjava.io.InputStreamReader;importjava.util.ArrayList;importjava.util.List;importjava.util.Locale;importorg.jdesktop.swingx.util.OS;/*** TEXT Recognize Utils

*@authorink.Flower

**/

public classOCRUtil {private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1

private final String EOL = System.getProperty("line.separator");private String tessPath = "C://Program Files (x86)//Tesseract-OCR";//ocr默认安装路径

private String transname="chi_sim";//默认中文语言包,识别中文/*** Construct method of OCR ,set Tesseract-OCR install path

*@paramtessPath Tesseract-OCR install path

*@paramtransFileName traningFile name like eng.traineddata*/

publicOCRUtil(String tessPath,String transFileName){this.tessPath=tessPath;this.transname=transFileName;

}/*** Construct method of OCR,default path is "C://Program Files (x86)//Tesseract-OCR"*/

publicOCRUtil(){ }publicString getTessPath() {returntessPath;

}public voidsetTessPath(String tessPath) {this.tessPath =tessPath;

}publicString getTransname() {returntransname;

}public voidsetTransname(String transname) {this.transname =transname;

}publicString getLANG_OPTION() {returnLANG_OPTION;

}publicString getEOL() {returnEOL;

}/*** recognize text in image

*@paramimageFile

*@paramimageFormat

*@returntext recognized in image

*@throwsException*/

public String recognizeText(File imageFile,String imageFormat)throwsException{

File tempImage= newImageIOHelper().createImage(imageFile,imageFormat);returnocrImages(tempImage, imageFile);

}/*** recognize text in image

*@paramimageFile

*@paramimageFormat

*@paramlocale

*@returntext recognized in image

*@throwsException*/

public String recognizeText(File imageFile,String imageFormat,Locale locale)throwsException{

File tempImage= newImageIOHelper(locale).createImage(imageFile,imageFormat);returnocrImages(tempImage, imageFile);

}/***

*@paramtempImage

*@paramimageFile

*@return*@throwsIOException

*@throwsInterruptedException*/

private String ocrImages(File tempImage,File imageFile) throwsIOException, InterruptedException{

File outputFile= new File(imageFile.getParentFile(),"output");

Runtime.getRuntime().exec("attrib "+"\""+outputFile.getAbsolutePath()+"\""+" +H"); //设置文件隐藏

StringBuffer strB = newStringBuffer();

List cmd = new ArrayList();if(OS.isWindowsXP()){

cmd.add(tessPath+"//tesseract");

}else if(OS.isLinux()){

cmd.add("tesseract");

}else{

cmd.add(tessPath+"//tesseract");

}

cmd.add("");

cmd.add(outputFile.getName());

cmd.add(LANG_OPTION);

cmd.add(transname);

ProcessBuilder pb= newProcessBuilder();

pb.directory(imageFile.getParentFile());

cmd.set(1, tempImage.getName());

pb.command(cmd);

pb.redirectErrorStream(true);

Process process=pb.start();int w =process.waitFor();

tempImage.delete();//删除临时正在工作文件

if(w==0){

BufferedReader in= new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));

String str;while((str = in.readLine())!=null){

strB.append(str).append(EOL);

}

in.close();

}else{

String msg;switch(w){case 1:

msg= "Errors accessing files.There may be spaces in your image's filename.";break;case 29:

msg= "Cannot recongnize the image or its selected region.";break;case 31:

msg= "Unsupported image format.";break;default:

msg= "Errors occurred.";

}

tempImage.delete();throw newRuntimeException(msg);

}new File(outputFile.getAbsolutePath()+".txt").delete();returnstrB.toString();

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值