packageorg.ink.image.textrz;importjava.io.BufferedReader;importjava.io.File;importjava.io.FileInputStream;importjava.io.IOException;importjava.io.InputStreamReader;importjava.util.ArrayList;importjava.util.List;importjava.util.Locale;importorg.jdesktop.swingx.util.OS;/*** TEXT Recognize Utils
*@authorink.Flower
**/
public classOCRUtil {private final String LANG_OPTION = "-l"; //英文字母小写l,并非数字1
private final String EOL = System.getProperty("line.separator");private String tessPath = "C://Program Files (x86)//Tesseract-OCR";//ocr默认安装路径
private String transname="chi_sim";//默认中文语言包,识别中文/*** Construct method of OCR ,set Tesseract-OCR install path
*@paramtessPath Tesseract-OCR install path
*@paramtransFileName traningFile name like eng.traineddata*/
publicOCRUtil(String tessPath,String transFileName){this.tessPath=tessPath;this.transname=transFileName;
}/*** Construct method of OCR,default path is "C://Program Files (x86)//Tesseract-OCR"*/
publicOCRUtil(){ }publicString getTessPath() {returntessPath;
}public voidsetTessPath(String tessPath) {this.tessPath =tessPath;
}publicString getTransname() {returntransname;
}public voidsetTransname(String transname) {this.transname =transname;
}publicString getLANG_OPTION() {returnLANG_OPTION;
}publicString getEOL() {returnEOL;
}/*** recognize text in image
*@paramimageFile
*@paramimageFormat
*@returntext recognized in image
*@throwsException*/
public String recognizeText(File imageFile,String imageFormat)throwsException{
File tempImage= newImageIOHelper().createImage(imageFile,imageFormat);returnocrImages(tempImage, imageFile);
}/*** recognize text in image
*@paramimageFile
*@paramimageFormat
*@paramlocale
*@returntext recognized in image
*@throwsException*/
public String recognizeText(File imageFile,String imageFormat,Locale locale)throwsException{
File tempImage= newImageIOHelper(locale).createImage(imageFile,imageFormat);returnocrImages(tempImage, imageFile);
}/***
*@paramtempImage
*@paramimageFile
*@return*@throwsIOException
*@throwsInterruptedException*/
private String ocrImages(File tempImage,File imageFile) throwsIOException, InterruptedException{
File outputFile= new File(imageFile.getParentFile(),"output");
Runtime.getRuntime().exec("attrib "+"\""+outputFile.getAbsolutePath()+"\""+" +H"); //设置文件隐藏
StringBuffer strB = newStringBuffer();
List cmd = new ArrayList();if(OS.isWindowsXP()){
cmd.add(tessPath+"//tesseract");
}else if(OS.isLinux()){
cmd.add("tesseract");
}else{
cmd.add(tessPath+"//tesseract");
}
cmd.add("");
cmd.add(outputFile.getName());
cmd.add(LANG_OPTION);
cmd.add(transname);
ProcessBuilder pb= newProcessBuilder();
pb.directory(imageFile.getParentFile());
cmd.set(1, tempImage.getName());
pb.command(cmd);
pb.redirectErrorStream(true);
Process process=pb.start();int w =process.waitFor();
tempImage.delete();//删除临时正在工作文件
if(w==0){
BufferedReader in= new BufferedReader(new InputStreamReader(new FileInputStream(outputFile.getAbsolutePath()+".txt"),"UTF-8"));
String str;while((str = in.readLine())!=null){
strB.append(str).append(EOL);
}
in.close();
}else{
String msg;switch(w){case 1:
msg= "Errors accessing files.There may be spaces in your image's filename.";break;case 29:
msg= "Cannot recongnize the image or its selected region.";break;case 31:
msg= "Unsupported image format.";break;default:
msg= "Errors occurred.";
}
tempImage.delete();throw newRuntimeException(msg);
}new File(outputFile.getAbsolutePath()+".txt").delete();returnstrB.toString();
}
}