我使用
this answer中描述的JS脚本代码,但我不想在html文件中保存html结果页面.我想从< div class =“rg_Meta”>中提取Json对象.并将它们传递给Java代码.
在搜索中,我发现使用“文档”,但我得到未定义的错误.我是PhantomJS的新手,在Java中使用JSON.
var page = require('webpage').create();
var fs = require('fs');
var system = require('system');
var url = "";
var searchParameter = "";
var count=0;
if (system.args.length === 4) {
url=system.args[1];
searchParameter=system.args[2];
count=system.args[3];
}
if(url==="" || searchParameter===""){
phantom.exit();
}
page.settings.userAgent = 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML,like Gecko) Chrome/37.0.2062.120 Safari/537.36';
page.zoomFactor = 0.1;
page.viewportSize = {
width: 1920,height: 1080
};
var divCount="-1";
var topPosition=0;
var unchangedCounter=0;
page.open(url,function(status) {
console.log("Status: " + status);
if(status === "success") {
window.setInterval(function() {
var newDivCount = page.evaluate(function() {
var divs = document.querySelectorAll(".rg_di.rg_bx.rg_el.ivg-i");
return divs[divs.length-1].getAttribute("data-ri");
});
topPosition = topPosition + 1080;
page.scrollPosition = {
top: topPosition,left: 0
};
if(newDivCount===divCount){
page.evaluate(function() {
var elems=document.getElementByClassName("rg_Meta");
console.log(elems.length);
var button = document.querySelector("#smb");
if(!(typeof button === "undefined")) {
button.click();
console.log('Clicked');
return true;
}else{
return false;
}
});
if(parseInt(unchangedCounter,10) === parseInt(count,10)){
/* var path = searchParameter+'.html';
fs.write('seedHtml/'+path,page.content,'w');
console.log('printing html');*/
phantom.exit();
}else{
unchangedCounter=unchangedCounter+1;
}
}else{
unchangedCounter=0;
}
divCount = newDivCount;
},500);
}else{
phantom.exit();
}
});