对于类似以下简单的验证码的识别方案:
1、
2
3
4、
1、建库:切割验证码为单个字符,人工标记,比如:A。
2、识别:给一个验证码:切割为单个字符,在库中查询识别。
/***
* author:chzeze
* 识别验证码并返回
* train_path 验证码字母图库位置
* 验证码图片缓存位置:Configuration.getProperties("web_save_path")+"/captcha.jpg"
*/
public class AmGetCaptchaTest {
private static Logger logger = Logger.getLogger(AmGetCaptchaTest.class);
private static String train_path = "/data/sata/share_sata/AmazonCrawl/amazonWeb/captcha";
private static Map trainMap = null;
private static int index = 0;
private static int imgnum = 0;
private static MultiThreadedHttpConnectionManager httpConnectionManager = new MultiThreadedHttpConnectionManager();
private static HttpClient client = new HttpClient(httpConnectionManager);
/* static {
//每主机最大连接数和总共最大连接数,通过hosfConfiguration设置host来区分每个主机
client.getHttpConnectionManager().getParams().setDefaultMaxConnectionsPerHost(8);
client.getHttpConnectionManager().getParams().setMaxTotalConnections(48);
client.getHttpConnectionManager().getParams().setConnectionTimeout(10000);
client.getHttpConnectionManager().getParams().setSoTimeout(10000);
client.getHttpConnectionManager().getParams()