1.用到了htmlunit的架包
2.可用于Js传值网页的抓取
3.速度比较慢,求牛人给个优化建议,增加查询速度
1.[代码]用到了htmlunit的架包
package org.sjcx.tool;
import com.gargoylesoftware.htmlunit.NicelyResynchronizingAjaxController;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class WebClientTest {
public void getElements(){
try{
WebClient webClient = new WebClient();
//设置webClient的相关参数
webClient.getOptions().setJavaScriptEnabled(true);
webClient.getOptions().setCssEnabled(false);
webClient.setAjaxController(new NicelyResynchronizingAjaxController());
webClient.getOptions().setThrowExceptionOnScriptError(false);
//模拟浏览器打开一个目标网址
String express_no = "http://q1.sto.cn/chaxun/result?express_no=";
String sn = "868120614072";//由于申通的快递单号过长,就选择使用String类型
HtmlPage rootPage= webClient.getPage(express_no + sn);
System.out.println("为了获取js执行的数据 线程开始沉睡等待");
Thread.sleep(1000); //主要是这个线程的等待 因为js加载也是需要时间的
System.out.println("线程结束沉睡");
String html = rootPage.asXml();//网页
// String html = rootPage.asText();//文本
int indexOf = html.indexOf("");
int indexEnd = html.indexOf("");
System.out.println(html.substring(indexOf, indexEnd));
}catch(Exception e){
}
}
public static void main(String[] args) throws Exception {
new WebClientTest().getElements();
}
}