phantomjs

最近做爬虫,基于网页上面有很多ajax请求,可以采用httpclient找到相应的ajax,这样做比较麻烦,然后采用phantomjs,直接可以获取到网页所有信息。
public static String dealWithAjax(String url) throws IOException {
Runtime rt = Runtime.getRuntime();
Process p = rt.exec("phantomjs.exe D:\\rj\\RedisImportMysql\\src\\js\\test.js "+url);
InputStream is = p.getInputStream();
BufferedReader br = new BufferedReader(new InputStreamReader(is));
StringBuffer sbf = new StringBuffer();
String tmp = "";
while((tmp = br.readLine())!=null){
sbf.append(tmp);
sbf.append("\n");
}
sbf.append("-----------------------------------------------------------------------");
return sbf.toString();
}

public static void main(String[] args) throws IOException {
// System.out.println(getAjaxCotnent("http://shixin.court.gov.cn/personMore.do"));
String s = dealWithAjax("http://list.tmall.com/search_product.htm?q=%CA%D6%BB%FA&type=p&vmarket=&spm=a222r.7716956.a2227oh.d100&from=..pc_1_searchbutton");
System.out.println(s);
FileUtil.writeFile("d://a.txt", "utf-8", s,true);

}


test.js源码:
//codes.js
system = require('system')
address = system.args[1];//获得命令行第二个参数 接下来会用到
//console.log('Loading a web page');
var page = require('webpage').create();
var url = address;
var newUrl ;
//console.log(url);
page.open(url, function (status) {
//Page is loaded!
if (status !== 'success') {
console.log('Unable to post!');
} else {
//console.log(page.content);
var content = page.evaluate(function() {
//var elements = document.querySelector("#xy-impcon-B");//示范下如何使用页面的jsapi去操作页面的 www.oicqzone.com
//return elements.innerHTML
return document.querySelector("*").innerHTML;
});
console.log(content);
}
});
page.onLoadFinished= function(){
console.log("执行完毕。。。。。。。。");

//window.scrollTo(0,10000);
//window.document.body.scrollTop = document.body.scrollHeight;


//page.sendEvent('keydown', page.event.key.40, null, null);
//var evtObj = document.createEvent('KeyboardEvent');
//ev.initEvent("click", true, true);
//evtObj.initKeyEvent('keydown', true, true, window, false, false, false, false, 40, 0 );
//document.dispatchEvent(evtObj);
console.log("事件。。。。。。。。。。。。。。。。。。。。。。。。。。。");
phantom.exit();
};
page.onUrlChanged = function(targetUrl) {
console.log('New URL: ' + targetUrl);
newUrl = targetUrl;
/*
* if((targetUrl.indexOf("http://www.baidu.com/s?"))>-1){
* page.open(targetUrl); }
*/

};
page.onResourceReceived = function(response) {
console.log('resource rec page.url---'+page.url);
console.log('reponse url---'+response.url);

};

page.onResourceError = function(resourceError) {
console.log('Unable to load resource (#' + resourceError.id + 'URL:'
+ resourceError.url + ')');
console.log('Error code: ' + resourceError.errorCode + '. Description: '
+ resourceError.errorString);
};


phantom.onError = function(msg, trace) {
var msgStack = ['PHANTOM ERROR: ' + msg];
if (trace && trace.length) {
msgStack.push('TRACE:');
trace.forEach(function(t) {
msgStack.push(' -> ' + (t.file || t.sourceURL) + ': ' + t.line + (t.function ? ' (in function ' + t.function +')' : ''));
});
}
console.error(msgStack.join('\n'));
phantom.exit(1);
};
//page.colse()
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值