public String getPageAsXml(String url) {
final WebClient webClient = new WebClient(
BrowserVersion.INTERNET_EXPLORER_8);
HtmlPage page = null;
try {
page = webClient.getPage(url);
} catch (FailingHttpStatusCodeException e) {
e.printStackTrace();
} catch (MalformedURLException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
String pageAsXml = page.asXml();
webClient.closeAllWindows();
return pageAsXml;
}
用htmlunit的优点是它不仅可以抓到静态的页面内容,而且可以抓到页面里js执行显示的内容,即它会像浏览器一样解析js