经过以上的铺垫,我们就差最后一步了-破解!首选我们来分析一下要做的事情:
1、加载包含验证码的页面,当然是用我们前面讲的phantomaJS来加载啦,因为极验验证码是依赖于js渲染的,我们必须等页面完全渲染完成后再执行拖动
2、收集一些页面的参数发送到java后台服务计算滑块的目标位移并接受结果
3、通过js模拟鼠标事件来实现滑块的移动
4、输出验证结果
好,让我们一步步来讲解如果实现上面的目标。
我们首先新建一个js文件,就叫做geetest_refresh.js好了,我们首先写一些样板代码,比如创建对象,日志处理和接收传进来的参数:
var utils = require('utils'); var casper = require('casper').create({ //clientScripts: ["jquery-2.1.3.min.js"], pageSettings: { javascriptEnabled: true, XSSAuditingEnabled: true, loadImages: true, // The WebPage instance used by Casper will loadPlugins: false, // use these settings userAgent: "Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/47.0.2526.111 Safari/537.36" }, waitTimeout: 10000, exitOnError: false, httpStatusHandlers: { 404: function () { console.log(404); } }, onAlert: function (msg) { console.log(msg); }, onError: function (self, m) { console.log("FATAL:" + m); self.exit(); }, onDie: function () { console.log('dieing'); }, onLoadError: function (casper, url) { console.log(url + ' can\'t be loaded'); }, onPageInitialized: function () { }, onResourceReceived: function () { //console.log(arguments[1]['url'] + ' Received'); }, onResourceRequested: function () { //console.log(arguments[1]['url'] + ' requested'); }, onStepComplete: function () { //console.log('onStepComplete'); }, onStepTimeout: function () { console.log('timeout'); }, logLevel: "debug", // Only "info" level messages will be logged verbose: false // log messages will be printed out to the console }); casper.on('remote.message', function (msg) { this.log(msg, 'info'); }); var pageUrl = casper.cli.get(0);//传进来的页面url var deltaResolveServer = casper.cli.get(1);//传进来的滑块位置求解服务地址 //定义一些内部变量 var id =( new Date()).getTime(); var pageParam = null;
然后是实现第一个目标:加载并渲染页面(这里还对页面做了一个判断,是否包含极验验证码):
casper.start(pageUrl).then(function () { this.wait(5000, function () { //this.echo("等待5秒以便页面充分渲染"); }); }); casper.then(function () { if (!this.exists(".gt_slider_knob")) { this.echo("页面中不存在极验验证码模块"); //this.echo(this.getPageContent()); this.exit(); } });
第二个目标:收集参数请求滑块位置:
casper.waitFor(function check() { return this.evaluate(function () { return (document.querySelectorAll('.gt_cut_bg_slice').length == 52) && (document.querySelectorAll('.gt_cut_fullbg_slice').length == 52);//确保页面已经渲染完成,出现了背景图 }); }, function then() { this.echo("页面渲染成功!"); var styleReg = new RegExp("background-image: url\\((.*?)\\); background-position: (.*?);"); var fullbgSrcArray = []; var fullbgCoordinateArray = []; var fullbgSliceArray = this.getElementsAttribute('.gt_cut_fullbg_slice', 'style'); for (var i = 0; i < fullbgSliceArray.length; i++) { var result = styleReg.exec(fullbgSliceArray[i]); if (result != null) { fullbgSrcArray.push(result[1]); fullbgCoordinateArray.push(result[2]); } else this.echo(fullbgSliceArray[i]); } var bgSrcArray = []; var bgCoordinateArray = []; var bgSliceArray = this.getElementsAttribute('.gt_cut_bg_slice', 'style'); for (var i = 0; i < bgSliceArray.length; i++) { var result = styleReg.exec(bgSliceArray[i]); if (result != null) { bgSrcArray.push(result[1]); bgCoordinateArray.push(result[2]); } } var data = {}; data.fullbgSrcArray = fullbgSrcArray; data.fullbgPositionArray = fullbgCoordinateArray; data.bgSrcArray = bgSrcArray; data.bgPositionArray = bgCoordinateArray; data.itemWidth = 10;//每个小块的宽度(像素) data.itemHeight = 58;//每个小块的高度(像素) data.lineItemCount = 26;//拼图中每行包含的小图片个数 pageParam = data; }, function () { this.echo("等待渲染超时!"); this.exist(); }, 10000); var deltaX = 0; casper.then(function () { if (pageParam == null) { this.echo("收集图片参数失败!"); //this.echo(this.getPageContent()); this.exit(); } this.echo("开始请求滑块位置"); var result = casper.evaluate(function (url, param) { return JSON.parse(__utils__.sendAJAX(url, 'POST', param, false));//ajax请求求解滑块位置 }, deltaResolveServer, {"params": JSON.stringify(pageParam)}); if (result != null && result.status == 1) { deltaX = result.data.deltaX; this.echo("滑块位置求解成功:" + JSON.stringify(result.data)); } else { this.echo("请求滑块位置失败:" + JSON.stringify(result)); this.exit(); } });
其中滑块位置求解后台服务也就一个Spring的一个controller而已:
package com.yay.geetestIdentification.controller; import com.alibaba.fastjson.JSON; import com.yay.geetestIdentification.model.RestFulResult; import com.yay.geetestIdentification.utils.ImageUtils; import org.apache.commons.collections.MapUtils; import org.apache.commons.collections.map.HashedMap; import org.springframework.util.Assert; import org.springframework.web.bind.annotation.RequestMapping; import org.springframework.web.bind.annotation.RequestMethod; import org.springframework.web.bind.annotation.RestController; import javax.servlet.http.HttpServletResponse; import java.io.File; import java.util.ArrayList; import java.util.List; import java.util.Map; @RestController public class CaptchaController { @RequestMapping(value = "resolveGeetestSlicePosition", method = RequestMethod.POST) public RestFulResult resolveGeetestSlicePosition(HttpServletResponse response, String params) { response.addHeader("Access-Control-Allow-Origin", "*"); Map<String, Object> paramMap = (Map<String, Object>) JSON.parseObject(params, Map.class); if (paramMap == null) return RestFulResult.failure("参数不能为空!"); List<String> fullbgSrcList = (List<String>) paramMap.get("fullbgSrcArray"); List<String> fullbgPositionList = (List<String>) paramMap.get("fullbgPositionArray"); List<String> bgSrcList = (List<String>) paramMap.get("bgSrcArray"); List<String> bgPositionList = (List<String>) paramMap.get("bgPositionArray"); int itemWidth = MapUtils.getIntValue(paramMap, "itemWidth"); int itemHeight = MapUtils.getIntValue(paramMap, "itemHeight"); int lineItemCount = MapUtils.getIntValue(paramMap, "lineItemCount"); try { Assert.notEmpty(fullbgSrcList); Assert.notEmpty(fullbgPositionList); Assert.notEmpty(bgSrcList); Assert.notEmpty(bgPositionList); Assert.isTrue(fullbgSrcList.size() == 52); Assert.isTrue(bgSrcList.size() == 52); Assert.isTrue(itemWidth > 0); Assert.isTrue(lineItemCount > 0); Assert.isTrue(itemHeight > 0); String tmpFolder = System.getProperty("user.dir") + "/tmp/"; File file = new File(tmpFolder); if (!file.exists() && !file.isDirectory()) file.mkdir(); String identification = String.valueOf(System.currentTimeMillis()); String imageSubfix = "jpg"; List<String[]> fullbgPointList = new ArrayList<>(); for (String positionStr : fullbgPositionList) { fullbgPointList.add(positionStr.replace("px", "").split(" ")); } List<String[]> bgPointList = new ArrayList<>(); for (String positionStr : bgPositionList) { bgPointList.add(positionStr.replace("px", "").split(" ")); } String fullbgImagePath = tmpFolder + identification + "_fullbg." + imageSubfix; String bgImagePath = tmpFolder + identification + "_bg." + imageSubfix; if (ImageUtils.combineImages(fullbgSrcList, fullbgPointList, lineItemCount, itemWidth, itemHeight, fullbgImagePath, imageSubfix) && ImageUtils.combineImages(bgSrcList, bgPointList, lineItemCount, itemWidth, itemHeight, bgImagePath, imageSubfix)) { int deltaX = ImageUtils.findXDiffRectangeOfTwoImage(fullbgImagePath, bgImagePath); //删除缓存的图片 deleteImage(fullbgImagePath); deleteImage(bgImagePath); Map<String, Object> resultMap = new HashedMap(); resultMap.put("deltaX", deltaX); resultMap.put("deltaY", 0); return RestFulResult.success(resultMap); } else { return RestFulResult.failure("合成图片失败!"); } } catch (Exception ex) { return RestFulResult.failure(ex.getMessage()); } } private void deleteImage(String fullbgImagePath) { File file = new File(fullbgImagePath); // 路径为文件且不为空则进行删除 if (file.isFile() && file.exists()) { file.delete(); } } }
第三个目标,实现滑块移动到目标位置:
var currentTrailIndex = 0; casper.then(function () { if (deltaX <= 0) { this.echo("滑块目标位移为0:处理失败"); this.exit(); } this.echo("开始移动滑块,目标位移为 " + deltaX); currentTrailIndex = this.evaluate(function (selector, deltaX) { var createEvent = function (eventName, ofsx, ofsy) { var evt = document.createEvent('MouseEvents'); evt.initMouseEvent(eventName, true, false, null, 0, 0, 0, ofsx, ofsy, false, false, false, false, 0, null); return evt; }; var trailArray = [ // 算法生成的鼠标轨迹数据,为了不至于给极验团队带来太多的麻烦,我这里就省略了,请大家谅解 ]; var trailIndex = Math.round(Math.random() * (trailArray.length - 1)); var deltaArray = trailArray[trailIndex]; console.log('当前使用轨迹路径:' + (trailIndex + 1)); var delta = deltaX - 7;//要移动的距离,减掉7是为了防止过拟合导致验证失败 delta = delta > 200 ? 200 : delta; //查找要移动的对象 var obj = document.querySelector(selector); var startX = obj.getBoundingClientRect().left + 20; var startY = obj.getBoundingClientRect().top + 18; var nowX = startX; var nowY = startY; console.log("startX:" + startX); console.log("startY:" + startY); var moveToTarget = function (loopRec) { setTimeout(function () { nowX = nowX + deltaArray[loopRec][0]; nowY = nowY + deltaArray[loopRec][1]; //console.log(loopRec + "次移动滑块"); obj.dispatchEvent(createEvent('mousemove', nowX, nowY)); console.log("当前滑块位置:" + obj.getBoundingClientRect().left); if (nowX > (startX + delta - 2)) { obj.dispatchEvent(createEvent('mousemove', startX + delta, nowY)); obj.dispatchEvent(createEvent('mouseup', startX + delta, nowY)); console.log("最终滑块位置:" + obj.getBoundingClientRect().left); } else { moveToTarget(loopRec + 1); } }, deltaArray[loopRec][2]); }; obj.dispatchEvent(createEvent("mousedown", startX, startY)); moveToTarget(2); return trailIndex; }, ".gt_slider_knob", deltaX); }).then(function () { casper.waitForSelectorTextChange('.gt_info_type', function () { var status = this.fetchText('.gt_info_type'); this.echo("验证结果:" + status); this.capture(status.replace(":","_")+ id + "_" + currentTrailIndex + '.png');//对当前页面进行截图以便复查 if (status.indexOf("通过") > -1) { if (this.exists('#verify')) { this.click("#verify"); this.echo("点击成功"); } } }, function () { this.echo("等待滑块移动超时!"); }, 10000); });
代码中的trailArray 保存着到目标位移的移动轨迹数据,也就是说先到哪个位置,再到哪个位置……。大家都知道极验验证码最难的就是对这个轨迹做了行为检测来区分人和机器人,因此这个数据相当重要,为了不给极验团队带来太多麻烦,我这里就省略了,毕竟人家也要吃饭啦。
最好一个目标,执行以上的操作并返回结果:
casper.run();
没错,就一行代码,上面脚本中的所有输出文字都可以在java代码中接收,然后判断是否验证成功,而且可以把验证结果的网页截图保存下来:
private static boolean startIdentification(String pageUrl,String domain,String cookies,String jsFileName, String deltaResolveAddress) { String result = CasperjsProgramManager.launch(jsFileName, pageUrl,deltaResolveAddress,domain,cookies, " web-security=no", "ignore-ssl-errors=true"); logger.info("验证码识别结果:\r\n" + result); return result != null && (result.contains("验证通过") || result.contains("不存在极验验证码")); }
运行结果:
[info] [phantom] Step then 7/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 页面渲染成功! [info] [phantom] Step then 7/10: done in 69935ms. [info] [phantom] Step anonymous 8/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 开始请求滑块位�? [debug] [remote] sendAJAX(): Using HTTP method: 'POST' 滑块位置求解成功:{"deltaX":119,"deltaY":0} [info] [phantom] Step anonymous 8/10: done in 80502ms. [info] [phantom] Step anonymous 9/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 开始移动滑�?目标位移�? 119 [info] [phantom] 当前使用轨迹路径:2 [info] [phantom] startX:51.03125 [info] [phantom] startY:292 [info] [phantom] Step anonymous 9/10: done in 80514ms. [info] [phantom] Step anonymous 10/10 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) [info] [phantom] Step anonymous 10/10: done in 80528ms. [info] [phantom] Step _step 11/11 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) [info] [phantom] Step _step 11/11: done in 80547ms. [info] [phantom] 当前滑块位置:33.03125 [info] [phantom] 当前滑块位置:33.03125 [info] [phantom] 当前滑块位置:34.03125 [info] [phantom] 当前滑块位置:35.03125 [info] [phantom] 当前滑块位置:36.03125 [info] [phantom] 当前滑块位置:37.03125 [info] [phantom] 当前滑块位置:38.03125 [info] [phantom] 当前滑块位置:39.03125 [info] [phantom] 当前滑块位置:40.03125 [info] [phantom] 当前滑块位置:41.03125 [info] [phantom] 当前滑块位置:44.03125 [info] [phantom] 当前滑块位置:46.03125 [info] [phantom] 当前滑块位置:47.03125 [info] [phantom] 当前滑块位置:48.03125 [info] [phantom] 当前滑块位置:49.03125 [info] [phantom] 当前滑块位置:50.03125 [info] [phantom] 当前滑块位置:51.03125 [info] [phantom] 当前滑块位置:53.03125 [info] [phantom] 当前滑块位置:55.03125 [info] [phantom] 当前滑块位置:56.03125 [info] [phantom] 当前滑块位置:58.03125 [info] [phantom] 当前滑块位置:60.03125 [info] [phantom] 当前滑块位置:61.03125 [info] [phantom] 当前滑块位置:64.03125 [info] [phantom] 当前滑块位置:66.03125 [info] [phantom] 当前滑块位置:67.03125 [info] [phantom] 当前滑块位置:68.03125 [info] [phantom] 当前滑块位置:69.03125 [info] [phantom] 当前滑块位置:71.03125 [info] [phantom] 当前滑块位置:73.03125 [info] [phantom] 当前滑块位置:75.03125 [info] [phantom] 当前滑块位置:76.03125 [info] [phantom] 当前滑块位置:77.03125 [info] [phantom] 当前滑块位置:78.03125 [info] [phantom] 当前滑块位置:79.03125 [info] [phantom] 当前滑块位置:81.03125 [info] [phantom] 当前滑块位置:83.03125 [info] [phantom] 当前滑块位置:85.03125 [info] [phantom] 当前滑块位置:86.03125 [info] [phantom] 当前滑块位置:87.03125 [info] [phantom] 当前滑块位置:88.03125 [info] [phantom] 当前滑块位置:89.03125 [info] [phantom] 当前滑块位置:90.03125 [info] [phantom] 当前滑块位置:91.03125 [info] [phantom] 当前滑块位置:92.03125 [info] [phantom] 当前滑块位置:94.03125 [info] [phantom] 当前滑块位置:95.03125 [info] [phantom] 当前滑块位置:96.03125 [info] [phantom] 当前滑块位置:97.03125 [info] [phantom] 当前滑块位置:98.03125 [info] [phantom] 当前滑块位置:100.03125 [info] [phantom] 当前滑块位置:103.03125 [info] [phantom] 当前滑块位置:104.03125 [info] [phantom] 当前滑块位置:105.03125 [info] [phantom] 当前滑块位置:106.03125 [info] [phantom] 当前滑块位置:108.03125 [info] [phantom] 当前滑块位置:110.03125 [info] [phantom] 当前滑块位置:114.03125 [info] [phantom] 当前滑块位置:116.03125 [info] [phantom] 当前滑块位置:118.03125 [info] [phantom] 当前滑块位置:119.03125 [info] [phantom] 当前滑块位置:121.03125 [info] [phantom] 当前滑块位置:122.03125 [info] [phantom] 当前滑块位置:124.03125 [info] [phantom] 当前滑块位置:125.03125 [info] [phantom] 当前滑块位置:127.03125 [info] [phantom] 当前滑块位置:129.03125 [info] [phantom] 当前滑块位置:130.03125 [info] [phantom] 当前滑块位置:134.03125 [info] [phantom] 当前滑块位置:135.03125 [info] [phantom] 当前滑块位置:136.03125 [info] [phantom] 当前滑块位置:137.03125 [info] [phantom] 当前滑块位置:138.03125 [info] [phantom] 当前滑块位置:139.03125 [info] [phantom] 当前滑块位置:140.03125 [info] [phantom] 当前滑块位置:142.03125 [info] [phantom] 最终滑块位�?143.03125 [info] [phantom] waitFor() finished in 1913ms. [info] [phantom] Step anonymous 12/12 http://user.geetest.com/login?url=http://account.geetest.com/report (HTTP 200) 验证结果:验证通过: [debug] [phantom] Capturing page to D:/yayCrawler/demo/GeetestIdentification/target/classes/casperjs/js/验证通过_1467992089127_1.png
验证通过的截图为: