一、前面已经测试过了tesseract的dos方式调用,接下来使用java代码方式调用tesseract工具识别验证码。
package com.cyn.utils;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
public class OCRUtil {
public static String getImgText(String imgPath) {
String result = "";
BufferedReader br = null;
String ocrLangData="outputbase nobatch digits";
String outPath = imgPath.substring(0, imgPath.lastIndexOf("."));
File file = new File(outPath + ".txt");
try {
Runtime runtime = Runtime.getRuntime();
String command = "tesseract " + imgPath + " " + outPath +" "+ ocrLangData;
Process ps = runtime.exec(command);
ps.waitFor();
br = new BufferedReader(new FileReader(file));
String temp = "";
StringBuffer sb = new StringBuffer();
while ((temp = br.readLine()) != null) {
sb.append(temp);
}
result = sb.toString();
} catch (Exception e) {
System.out.println("识别图片异常!");
e.printStackTrace();
}finally{
try {
br.close();
file.delete();
} catch (IOException e) {
e.printStackTrace();
}
}
return result;
}
public static void main(String[] args) {
getImgTxtList("E:\\TestCode");
}
public static void getImgTxtList(String filepath){
File file = new File(filepath);
File [] fileList = file.listFiles();
for(File f:fileList){
String imgpath=f.getAbsolutePath();
if(imgpath.endsWith("jpg")||imgpath.endsWith("png")||imgpath.endsWith("bmp")){
String resultTxt = getImgText(imgpath);
System.out.println("result: "+resultTxt);
}
}
}
上面的代码作用是将E:\TestCode文件夹下的所有图片文件加以识别。在java代码中调用dos命令使用tesseract工具。