1.首先下载HtmlUnit的jar包,导入。
package com.shu.htmlUnit;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class HtmlUnit01 {
public static void main(String[] args) throws Exception{
String url = "http://www.baidu.com";
String str;
//创建一个webClient,模拟浏览器
//WebClient webClient = new WebClient();
//使用FireFox读取网页
WebClient webClient = new WebClient(BrowserVersion.FIREFOX_38);
//使用Chrome读取网页
//WebClient webClient = new WebClient(BrowserVersion.CHROME);
//打开的话,就是执行javaScript/Css
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setCssEnabled(false);
//获取页面
HtmlPage page = webClient.getPage(url);
//获取页面的title
str = page.getTitleText();
System.out.println("Title:------"+str);
//获取页面的XML代码
str = page.asXml();
System.out.pri