需要说一下,这里说的方法对简单的图片读数字的正确率是非常高的。但是对于复杂的如字母、甚至汉字,正确率就不敢保证了。对于那些图片背景十分混乱,肉眼都得仔细分辨的,为了您的宝贵时间,就不要往下看了。
需要引入的jar包:
<!--图片识别-->
<dependency>
<groupId>net.java.dev.jna</groupId>
<artifactId>jna</artifactId>
<version>4.1.0</version>
</dependency>
<dependency>
<groupId>net.sourceforge.tess4j</groupId>
<artifactId>tess4j</artifactId>
<version>2.0.1</version>
<exclusions>
<exclusion>
<groupId>com.sun.jna</groupId>
<artifactId>jna</artifactId>
</exclusion>
</exclusions>
</dependency>
代码:
import com.camel.common.utils.HttpClient;
import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;
import net.sourceforge.tess4j.util.LoadLibs;
import org.apache.commons.io.IOUtils;
import org.apache.http.HttpResponse;
import java.io.*;
/**
* 测试类
*/
public class TestOne {
public static void main(String[] args) throws Exception {
downloadJPG(HttpClient.post("验证码url", "time=" + System.currentTimeMillis()),"1.jpg");
String code = getImgContent("1.jpg");
System.out.println("验证码 = " + code);
}
protected static void downloadJPG(HttpResponse httpResponse,String fileName) throws IOException {
InputStream input = httpResponse.getEntity().getContent();
OutputStream output = new FileOutputStream(new File(fileName));
IOUtils.copy(input, output);
if (output != null) {
output.close();
}
output.flush();
}
protected static String getImgContent(String imgUrl) {
String content = "";
File imageFile = new File(imgUrl);
//读取图片数字
ITesseract instance = new Tesseract();
File tessDataFolder = LoadLibs.extractTessResources("tessdata");
instance.setLanguage("eng");//英文库识别数字比较准确
instance.setDatapath(tessDataFolder.getAbsolutePath());
try {
content = instance.doOCR(imageFile).replace("\n", "");
System.out.println(content);
} catch (TesseractException e) {
System.err.println(e.getMessage());
}
return content;
}
}