版权声明:本文为博主原创文章,未经博主允许不得转载。如需转载请声明:【转自 http://blog.csdn.net/xiaoxian8023 】
这个HttpClientUtil工具类分享在GitHub上已经半年多的时间了,并且得到了不小的关注,有25颗star,被fork了38次。有了大家的鼓励,工具类一直也在完善中。最近比较忙,两个多月前的修改在今天刚修改测试完成,今天再次分享给大家。
验证码识别这项技术并不是本工具类的功能,而是通过一个开源的api来识别验证码的。这里做了一个简单的封装,主要是用来解决登陆时的验证码的问题。在线验证码识别官网:http://lab.ocrking.com/,github地址:https://github.com/AvensLab/OcrKing/,是一个功能非常强大的工具。
好了,言归正传,本次封装的工具重要代码如下:
- /**
- * 识别验证码
- *
- * @author arron
- * @date 2016年3月24日 上午9:44:35
- * @version 1.0
- */
- public class OCR {
- /**
- * 接口说明:
- * https://github.com/AvensLab/OcrKing/blob/master/线上识别http接口说明.txt
- */
- private static final String apiUrl = "http://lab.ocrking.com/ok.html";
- private static final String apiKey = PropertiesUtil.getProperty("OCR.key");
- private static final String boundary = "----------------------------OcrKing_Client_Aven_s_Lab";
- private static final String end="\r\n--" + boundary + "--\r\n";
- private static final Header[] headers = HttpHeader.custom() .referer("http://lab.ocrking.com/?javaclient0.1)")
- .build();
- private static final Map<String, Object> map = getParaMap();
- private static HttpClient client =null; //=HCB.custom().proxy("127.0.0.1", 8888).build();
- public static void debug(){
- client =HCB.custom().proxy("127.0.0.1", 8888).build();
- }
- public static void exitDebug(){
- client =null;
- }
- //获取固定参数
- private static Map<String, Object> getParaMap(){
- //加载所有参数
- Map<String , Object> map = new HashMap<String, Object>();
- map.put("service", "OcrKingForCaptcha");
- map.put("language", "eng");
- map.put("charset", "7");//7-数字大写小写,5-数字大写字母
- map.put("type", "http://www.unknown.com");
- map.put("apiKey", apiKey);
- return map;
- }
- /**
- * 识别本地校验码(英文:字母+大小写)
- *
- * @param imgFilePath 验证码地址
- * @return
- */
- public static String ocrCode(String filePath){
- return ocrCode(filePath, 0);
- }
- /**
- * 识别本地校验码(英文:字母+大小写)
- *
- * @param imgFilePath 验证码地址
- * @param limitCodeLen 验证码长度(如果结果与设定长度不一致,则返回获取失败的提示)
- * @return
- */
- @SuppressWarnings("resource")
- public static String ocrCode(String imgFilePath, int limitCodeLen){
- byte[] data = null;
- String fileName = imgFilePath.replaceAll("[^/]*/|[^\\\\]*\\\\", "");
- StringBuffer strBuf = new StringBuffer();
- for (Entry<String, Object> entry : map.entrySet()) {
- strBuf.append("\r\n").append("--").append(boundary).append("\r\n");
- strBuf.append("Content-Disposition: form-data; name=\"" + entry.getKey() + "\"\r\n\r\n");
- strBuf.append(entry.getValue());
- }
- strBuf.append("\r\n").append("--").append(boundary).append("\r\n");
- strBuf.append("Content-Disposition: form-data; name=\"ocrfile\"; filename=\"" + fileName + "\"\r\n");
- strBuf.append("Content-Type:application/octet-stream\r\n\r\n");
- //读取文件
- File f = new File(imgFilePath);
- if(!f.exists()){
- return "Error:文件不存在!";
- }
- //内容长度=参数长度+文件长度+结尾字符串长度
- ByteArrayOutputStream bos = new ByteArrayOutputStream(strBuf.length()+(int)f.length()+end.length());
- try {
- bos.write(strBuf.toString().getBytes());//转化参数内容
- BufferedInputStream in = new BufferedInputStream(new FileInputStream(f));
- int buf_size = 1024;
- int len = 0;
- byte[] buf = new byte[buf_size];
- while (-1 != (len = in.read(buf, 0, buf_size))) {
- bos.write(buf, 0, len);
- }
- bos.write(end.getBytes());
- data= bos.toByteArray();
- } catch (IOException e) {
- e.printStackTrace();
- }
- Map<String , Object> m = new HashMap<String, Object>();
- m.put(Utils.ENTITY_BYTES, data);
- String html;
- try {
- html = HttpClientUtil.post(HttpConfig.custom().client(client).url(apiUrl).headers(headers).map(m));
- //System.out.println(html);
- String[] results = StringUtil.regex("<Result>([^<]*)</Result>\\s*<Status>([^<]*)</Status>", html);
- if(results.length>0){
- //System.out.println(results[0]);
- if(limitCodeLen<=0 || limitCodeLen==results[0].length()){//不判断长度或者长度一致时,直接返回
- return results[0];
- }
- }
- } catch (HttpProcessException e) {
- e.printStackTrace();
- }
- return "Error:获取失败!";
- }
- /**
- * 直接获取网络验证码(验证码不刷新)
- *
- * @param imgUrl 验证码地址
- * @return
- */
- public static String ocrCode4Net(String imgUrl){
- return ocrCode4Net(imgUrl, 0);
- }
- /**
- * 直接获取网络验证码(验证码不刷新)
- *
- * @param imgUrl 验证码地址
- * @param limitCodeLen 验证码长度
- * @return
- */
- public static String ocrCode4Net(String imgUrl, int limitCodeLen){
- Map<String, Object> map = getParaMap();
- map.put("url", imgUrl);
- Header[] headers = HttpHeader.custom().userAgent("Mozilla/5.0 (Windows NT 5.1; zh-CN; rv:1.9.1.3) Gecko/20100101 Firefox/8.0").build();
- try {
- String html = HttpClientUtil.post(HttpConfig.custom().client(client).url(apiUrl).headers(headers).map(map));
- //System.out.println(html);
- String[] results = StringUtil.regex("<Result>([^<]*)</Result>\\s*<Status>([^<]*)</Status>", html);
- if(results.length>0){
- //System.out.println(results[0]);
- if(limitCodeLen<=0 || limitCodeLen==results[0].length()){//不判断长度或者长度一致时,直接返回
- return results[0];
- }
- }
- } catch (HttpProcessException e) {
- e.printStackTrace();
- }
- return "Error:获取失败!";
- }
- /**
- * 直接获取网络验证码(通过获取图片流,然后识别验证码)
- *
- * @param config HttpConfig对象(设置cookie)
- * @param savePath 图片保存的完整路径(值为null时,不保存),如:c:/1.png
- * @return
- */
- public static String ocrCode4Net(HttpConfig config, String savePath){
- return ocrCode4Net(config, savePath, 0);
- }
- /**
- * 直接获取网络验证码(通过获取图片流,然后识别验证码)
- *
- * @param config HttpConfig对象(设置cookie)
- * @param savePath 图片保存的完整路径(值为null时,不保存),如:c:/1.png
- * @param limitCodeLen 验证码长度
- * @return
- */
- @SuppressWarnings("resource")
- public static String ocrCode4Net(HttpConfig config, String savePath, int limitCodeLen){
- byte[] data = null;
- StringBuffer strBuf = new StringBuffer();
- for (Entry<String, Object> entry : map.entrySet()) {
- strBuf.append("\r\n").append("--").append(boundary).append("\r\n");
- strBuf.append("Content-Disposition: form-data; name=\"" + entry.getKey() + "\"\r\n\r\n");
- strBuf.append(entry.getValue());
- }
- strBuf.append("\r\n").append("--").append(boundary).append("\r\n");
- strBuf.append("Content-Disposition: form-data; name=\"ocrfile\"; filename=\"" + "aaa" + "\"\r\n");
- strBuf.append("Content-Type:application/octet-stream\r\n\r\n");
- //下载图片
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- try {
- out = (ByteArrayOutputStream) HttpClientUtil.down(config.client(client).out(out));
- if(savePath==null || savePath.equals("")){
- }else{
- //本地测试,可以保存一下图片,方便核验
- FileOutputStream fos = new FileOutputStream(savePath);
- fos.write(out.toByteArray());
- }
- ByteArrayOutputStream bos = new ByteArrayOutputStream(out.size()+strBuf.length()+end.length());
- bos.write(strBuf.toString().getBytes());
- bos.write(out.toByteArray());
- bos.write(end.getBytes());
- data= bos.toByteArray();
- } catch (HttpProcessException e) {
- e.printStackTrace();
- } catch (IOException e) {
- e.printStackTrace();
- }
- Map<String , Object> m = new HashMap<String, Object>();
- m.put(Utils.ENTITY_BYTES, data);
- String html;
- try {
- html = HttpClientUtil.post(config.client(client).url(apiUrl).headers(headers).map(m));
- //System.out.println(html);
- String[] results = StringUtil.regex("<Result>([^<]*)</Result>\\s*<Status>([^<]*)</Status>", html);
- if(results.length>0){
- //System.out.println(results[0]);
- if(limitCodeLen<=0 || limitCodeLen==results[0].length()){//不判断长度或者长度一致时,直接返回
- return results[0];
- }
- }
- } catch (HttpProcessException e) {
- e.printStackTrace();
- }
- return "Error:获取失败!";
- }
- }
上面代码中用到了StringUtil.regex方法,具体如下:
- /**
- * 通过正则表达式获取内容
- *
- * @param regex 正则表达式
- * @param from 原字符串
- * @return
- */
- public static String[] regex(String regex, String from){
- Pattern pattern = Pattern.compile(regex);
- Matcher matcher = pattern.matcher(from);
- List<String> results = new ArrayList<String>();
- while(matcher.find()){
- for (int i = 0; i < matcher.groupCount(); i++) {
- results.add(matcher.group(i+1));
- }
- }
- return results.toArray(new String[]{});
- }
- /**
- * 最简单的属性文件读取工具类
- *
- * @author arron
- * @date 2016年1月14日 下午5:37:18
- * @version 1.0
- */
- public class PropertiesUtil {
- /**
- * 默认属性集合(文件在Constants中配置)
- */
- protected static Properties defaultProp = null;
- /**
- * 所有读取过的属性集合
- * 文件名 <-> 属性集合
- */
- protected static Map<String, Properties> allProps = new HashMap<String, Properties>();
- // 初始化默认的属性集合
- static {
- if (defaultProp == null) {
- defaultProp = loadProperties("config.properties");
- allProps.put("config.properties", defaultProp);
- }
- }
- /**
- * 读取属性文件,并将读出来的属性集合添加到【allProps】当中
- * 如果该属性文件之前已读取过,则直接从【allProps】获得
- */
- public static Properties getProperties(String fileName) {
- if (fileName==null || "".equals(fileName)) {
- return defaultProp;
- } else {
- Properties prop = allProps.get(fileName);
- if(prop == null) {
- prop = loadProperties(fileName);
- allProps.put(fileName, prop);
- }
- return prop;
- }
- }
- /**
- * 解析属性文件,将文件中的所有属性都读取到【Properties】当中
- */
- protected static Properties loadProperties (String fileName) {
- Properties prop = new Properties();
- InputStream ins = null;
- ins = PropertiesUtil.class.getClassLoader().getResourceAsStream(fileName);
- if (ins == null) {
- System.err.println("Can not find the resource!");
- } else {
- try {
- prop.load(ins);
- } catch (IOException e) {
- System.err.println("An error occurred when reading from the input stream, "+e.getMessage());
- } catch (IllegalArgumentException e) {
- System.err.println("The input stream contains a malformed Unicode escape sequence, "+e.getMessage());
- }
- }
- return prop;
- }
- /**
- * 从指定的属性文件中获取某一属性值
- * 如果属性文件不存在该属性则返回 null
- */
- public static String getProperty(String fileName, String name){
- return getProperties(fileName).getProperty(name);
- }
- /**
- * 从默认的属性文件中获取某一属性值
- * 如果属性文件不存在该属性则返回 null
- */
- public static String getProperty(String name){
- return getProperties(null).getProperty(name);
- }
- }
- public static void main(String[] args) throws InterruptedException, HttpProcessException {
- String qq = "123456789";//qq号
- String imgUrl = "http://qqxoo.com/include/vdimgvt.php?t="+Math.random(); //获取验证码图片地址
- String verifyUrl = "http://qqxoo.com/include/vdcheck.php";
- String saveCodePath = "C:/1.png";//保存验证码图片路径
- Header[] headers = HttpHeader.custom().referer("http://qqxoo.com/main.html?qqid="+qq).build();//设置referer,是为了获取对应qq号的验证码,否则报错
- HttpConfig config = HttpConfig.custom().headers(headers).context(HttpCookies.custom().getContext());//必须设置context,是为了携带cookie进行操作
- String result =null;//识别结果
- do {
- if(result!=null){
- System.err.println("本次识别失败!");
- }
- //获取验证码
- //OCR.debug(); //开始Fiddler4抓包(127.0.0.1:8888)
- String code = OCR.ocrCode4Net(config.url(imgUrl), saveCodePath);
- while(code.length()!=5){//如果识别的验证码位数不等于5,则重新识别
- if(code.equals("亲,apiKey已经过期或错误,请重新获取")){
- System.err.println(code);
- return;
- }
- code = OCR.ocrCode4Net(config.url(imgUrl), saveCodePath);
- }
- System.out.println("本地识别的验证码为:"+code);
- System.out.println("验证码已保存到:"+saveCodePath);
- //开始验证识别的验证码是否正确
- result = HttpClientUtil.get(config.url(verifyUrl+"?vc="+code+"&qqid="+qq));
- } while (result.contains("succeed"));
- System.out.println("识别验证码成功!反馈信息如下:\n" + result);
- }