OCR光学字符识别,Testseract-OCR工具是用来识别验证码、PDFtotext的开源工具,很多验证码暴力破解工具都有它的身影,附件小脚本是用来测试验证码强度工具,
如果能轻易识别,则验证码强度不够(先下载安装testocr,否则脚本跑不起来,附脚本代码)
package www;
import java.io.BufferedReader;import java.io.File;
import java.io.FileOutputStream;
import java.io.FileReader;
import java.io.IOException;
import java.io.InputStream;
import java.net.URL;
import java.net.URLConnection;
public class Captcha_code {
public static void main(String[] args) throws IOException {
if (args.length == 0) {
System.out.println("Usage:java -jar Captcha_code.jar http://www.xx.com/code.jsp");
System.exit(0);
} else {
String code = Captcha(args[0]);
System.out.println(code);
}
}
public static String Captcha(String captcha_url) throws IOException {
String code = "";
URL url = new URL(captcha_url);
URLConnection uc = url.openConnection();
uc.setRequestProperty("User-Agent",
"Mozilla/5.0 (Windows NT 6.1; rv:38.0) Gecko/20100101 Firefox/38.0");
InputStream is = uc.getInputStream();
File file = new File("d:/test.png");
if (!file.exists()) {
file.createNewFile();
}
FileOutputStream fos = new FileOutputStream("d:/test.png");
byte[] buff = new byte[1024];
int length;
while ((length = is.read(buff)) != -1) {
fos.write(buff, 0, length);
}
fos.close();
is.close();
try {
Process p = Runtime.getRuntime().exec(
"tesseract.exe d:/test.png d:/test -l eng");
p.waitFor();
BufferedReader br = new BufferedReader(
new FileReader("d:/test.txt"));
String s;
while ((s = br.readLine()) != null) {
code += s;
}
br.close();
} catch (InterruptedException e) {
e.printStackTrace();
}
return code;
}
}