package com;
import com.gargoylesoftware.htmlunit.BrowserVersion;
import com.gargoylesoftware.htmlunit.WebClient;
import com.gargoylesoftware.htmlunit.html.DomElement;
import com.gargoylesoftware.htmlunit.html.DomNodeList;
import com.gargoylesoftware.htmlunit.html.HtmlElement;
import com.gargoylesoftware.htmlunit.html.HtmlListItem;
import com.gargoylesoftware.htmlunit.html.HtmlPage;
public class Test {
public static void main(String[] args) {
WebClient webClient=new WebClient(BrowserVersion.FIREFOX_60); //实例化客户端
try {
HtmlPage page=webClient.getPage("http://www.xinhuanet.com/ent/dy.htm");//解析获取页面
//查找指定id的元素
HtmlElement ul=page.getHtmlElementById("showData0");
System.out.println(ul.asXml());
System.out.println("---------------------");
System.out.println("==================================");
//根据tag名称查询所有元素
DomNodeList<DomElement> aList=page.getElementsByTagName("a");
for(DomElement dom:aList) {
System.out.println(dom.asXml());
}
System.out.println("==================================");
System.out.println("==================================");
//xpath方式获取指定元素
HtmlListItem item= (HtmlListItem)page.getByXPath("//ul[@id='showData0']/li[1]").get(0);
System.out.println(item.asXml());
}catch(Exception e) {
e.printStackTrace();
}
finally {
webClient.close();//关闭客户端,释放内存
}
}
}
运行结果:
[ERROR] 2019-01-02 22:03:14,094 method:com.gargoylesoftware.htmlunit.javascript.StrictErrorReporter.runtimeError(StrictErrorReporter.java:82)
runtimeError: message=[An invalid or illegal selector was specified (selector: 'iframe:visible' error: Invalid selector: iframe:visible).] sourceName=[http://www.news.cn/static/jq.js] line=[3] lineSource=[null] lineOffset=[0]
[ERROR] 2019-01-02 22:03:14,320 method:com.gargoylesoftware.htmlunit.javascript.StrictErrorReporter.runtimeError(StrictErrorReporter.java:82)
runtimeError: message=[An invalid or illegal selector was specified (selector: '.lazyload:visible' error: Invalid selector: *.lazyload:visible).] sourceName=[http://www.news.cn/static/jq.js] line=[3] lineSource=[null] lineOffset=[0]
<ul id="showData0" class="dataList">
<li class="clearfix">
<h3>
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123937681.htm" target="_blank">
《沉默的雪》发布终极海报
</a>
</h3>
<i class="imgs">
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123937681.htm" targrt="_blank">
<img class="lazyload" src="http://ent.news.cn/titlepic/112393/1123937699_1546406807795_title0h.jpg" data-original="http://ent.news.cn/titlepic/112393/1123937699_1546406807795_title0h.jpg" style="display: block;"/>
</a>
</i>
<p class="summary">
</p>
<div class="info">
<div class="bdsharebuttonbox clearfix">
<span id="bdshare" class="bdshare_t bds_tools get-codes-bdshare feed-card-share" data="text:'《沉默的雪》发布终极海报',url:'http://www.xinhuanet.com/ent/2019-01/02/c_1123937681.htm',pic:'图片路径'">
<span class="bds_more">
分享
</span>
</span>
</div>
<span class="time">
2019-01-02 13:27:09
</span>
</div>
</li>
<li class="clearfix">
<h3>
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123936379.htm" target="_blank">
《飞驰人生》全新预告
</a>
</h3>
<i class="imgs">
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123936379.htm" targrt="_blank">
<img class="lazyload" src="http://ent.news.cn/titlepic/112393/1123936892_1546396816566_title0h.jpg" data-original="http://ent.news.cn/titlepic/112393/1123936892_1546396816566_title0h.jpg" style="display: block;"/>
</a>
</i>
<p class="summary">
</p>
<div class="info">
<div class="bdsharebuttonbox clearfix">
<span id="bdshare" class="bdshare_t bds_tools get-codes-bdshare feed-card-share" data="text:'《飞驰人生》全新预告',url:'http://www.xinhuanet.com/ent/2019-01/02/c_1123936379.htm',pic:'图片路径'">
<span class="bds_more">
分享
</span>
</span>
</div>
<span class="time">
2019-01-02 10:40:38
</span>
</div>
</li>
<li class="clearfix">
<h3>
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123936210.htm" target="_blank">
史诗级长镜头VS现象级营销,谁赢?
</a>
</h3>
<i class="imgs">
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123936210.htm" targrt="_blank">
<img class="lazyload" src="http://ent.news.cn/titlepic/112393/1123936210_1546393488329_title0h.jpg" data-original="http://ent.news.cn/titlepic/112393/1123936210_1546393488329_title0h.jpg" style="display: block;"/>
</a>
</i>
<p class="summary">
“首部预售即破亿元的文艺片”,仅此一条,《地球最后的夜晚》就足够在中国电影市场形成话题。更何况,这部号召大家具有仪式感地“一吻跨年”的影片,真真切切劈开了热爱与痛骂的阵营。
</p>
<div class="info">
<div class="bdsharebuttonbox clearfix">
<span id="bdshare" class="bdshare_t bds_tools get-codes-bdshare feed-card-share" data="text:'史诗级长镜头VS现象级营销,谁赢?',url:'http://www.xinhuanet.com/ent/2019-01/02/c_1123936210.htm',pic:'图片路径'">
<span class="bds_more">
分享
</span>
</span>
</div>
<span class="time">
2019-01-02 09:46:25
</span>
</div>
</li>
<li class="clearfix">
<h3>
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123935506.htm" target="_blank">
电影《战斗民族养成记》新年海报曝光
</a>
</h3>
<i class="imgs">
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123935506.htm" targrt="_blank">
<img class="lazyload" src="http://ent.news.cn/titlepic/112393/1123935506_1546390621436_title0h.jpg" data-original="http://ent.news.cn/titlepic/112393/1123935506_1546390621436_title0h.jpg" style="display: block;"/>
</a>
</i>
<p class="summary">
即将于1月18日全国上映的爱情喜剧电影《战斗民族养成记》发布元旦特别版海报。
</p>
<div class="info">
<div class="bdsharebuttonbox clearfix">
<span id="bdshare" class="bdshare_t bds_tools get-codes-bdshare feed-card-share" data="text:'电影《战斗民族养成记》新年海报曝光',url:'http://www.xinhuanet.com/ent/2019-01/02/c_1123935506.htm',pic:'图片路径'">
<span class="bds_more">
分享
</span>
</span>
</div>
<span class="time">
2019-01-02 08:58:13
</span>
</div>
</li>
<li class="clearfix">
<h3>
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123934879.htm" target="_blank">
动物冒险片《一条狗的回家路》中国将映
</a>
</h3>
<i class="imgs">
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123934879.htm" targrt="_blank">
<img class="lazyload" src="http://ent.news.cn/titlepic/112393/1123934879_1546388994994_title0h.jpg" data-original="http://ent.news.cn/titlepic/112393/1123934879_1546388994994_title0h.jpg" style="display: block;"/>
</a>
</i>
<p class="summary">
2017年3月,美国电影《一条狗的使命》在中国上映;近2年后,其姊妹篇《一条狗的回家路》将于1月18日再次登陆中国。
</p>
<div class="info">
<div class="bdsharebuttonbox clearfix">
<span id="bdshare" class="bdshare_t bds_tools get-codes-bdshare feed-card-share" data="text:'动物冒险片《一条狗的回家路》中国将映',url:'http://www.xinhuanet.com/ent/2019-01/02/c_1123934879.htm',pic:'图片路径'">
<span class="bds_more">
分享
</span>
</span>
</div>
<span class="time">
2019-01-02 08:30:09
</span>
</div>
</li>
<li class="clearfix">
<h3>
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123935012.htm" target="_blank">
《廉政风云》预告获观众“正能量”好评
</a>
</h3>
<i class="imgs">
<a href="http://www.xinhuanet.com/ent/2019-01/02/c_1123935012.htm" targrt="_blank">
<img class="lazyload" src="http://ent.news.cn/titlepic/112393/1123935012_1546388831333_title0h.jpg" data-original="http://ent.news.cn/titlepic/112393/1123935012_1546388831333_title0h.jpg" style="display: block;"/>
</a>
</i>
<p class="summary">
《廉政风云》年前发布了“金牌出击”版预告。
</p>
<div class="info">
<div class="bdsharebuttonbox clearfix">
<span id="bdshare" class="bdshare_t bds_tools get-codes-bdshare feed-card-share" data="text:'《廉政风云》预告获观众“正能量”好评',url:'http://www.xinhuanet.com/ent/2019-01/02/c_1123935012.htm',pic:'图片路径'">
<span class="bds_more">
分享
</span>
</span>
</div>
<span class="time">
2019-01-02 08