-------------
// http://casperjs.org/
var casper = require('casper').create({pageSettings: {
loadImages: false, // 不加载图片,减少请求
}
});
var fs = require('fs');
casper.userAgent('Mozilla/5.0 (Macintosh; Intel Mac OS X 10_7_5) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/30.0.1599.101 Safari/537.36');
var casper = require('casper').create();
var url = casper.cli.args;
console.log("casper.cli.args:",url);
var fetchUrl='http://xian.qq.com/l/dqlv/lyzx/dqlyzx.htm' //动态获取参数
casper.start(fetchUrl, function() {
this.echo(this.getHTML('div .main',true));
//fs.write("daqin.html",this.getHTML('div.main'),'w')
//this.echo(this.getHTML('div.search_notes', true));
//fs.write("1.html", this.getHTML('div.search_feed div.search_notes'), 'w')
});
function getTextContent(strRule, strMesg)
{
//给evaluate传入参数
var textinfo = casper.evaluate(function(rule) {
var valArr = '';
$(rule).each(function(index,item){
valArr = valArr + $(this).text() + ',';
});
return valArr.substring(0,valArr.length-1);
}, strRule);
casper.echo(strMesg);
require('utils').dump(textinfo.split(','));
return textinfo.split(',');
};
casper.run();
---------------------
public class Daqin {
//jsoup细化解析html
public static void main(String[] args) {
try {
String content = getAjaxCotnent("http://xian.qq.com/l/dqlv/lyzx/dqlyzx.htm");
Document doc = Jsoup.parse(content);
Elements result1 = doc.getElementsByTag("li");
Elements result2 = doc.getElementsByTag("a");
Elements result3 = doc.getElementsByTag("span");
for(Element element1 : result1){
System.out.println(element1.text());
}
for(Element element2 : result2){
System.out.println(element2.text());
}
for(Element element3 : result3){
System.out.println(element3.text());
}
} catch (IOException e) {
e.printStackTrace();
}
}
public static String getAjaxCotnent(String url) throws IOException {
Runtime rt = Runtime.getRuntime();
Process p = rt.exec("C:\\casperjs\\bin\\" + "casperjs.exe " + "C:\\casperjs\\tests\\" + "daqin.js " + url);
InputStream is = p.getInputStream();
//BufferedReader br = new BufferedReader(new InputStreamReader(is));
BufferedReader br = new BufferedReader(new InputStreamReader(is, "gbk"));
StringBuffer sbf = new StringBuffer();
String tmp = "";
while ((tmp = br.readLine()) != null) {
sbf.append(tmp.trim());
}
System.out.println("---------"+sbf.toString());
return sbf.toString();
}
}