java by.xpath_Java HtmlPage.getByXPath方法代码示例

import com.gargoylesoftware.htmlunit.html.HtmlPage; //导入方法依赖的package包/类

public Persona obtainPersonas(String host)

throws FailingHttpStatusCodeException, MalformedURLException,

IOException {

if (this.patterns == null

|| (this.patterns != null && this.patterns.isEmpty()))

initPatterns();

WebClient webClient = new WebClient();

webClient.getOptions().setJavaScriptEnabled(false);

webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);

webClient.getOptions().setThrowExceptionOnScriptError(false);

HtmlPage htmlPage = null;

Persona persona = new Persona();

persona.setHostPatternKey(host);

;

persona.setPageId(page.toURI().toString());

try {

htmlPage = webClient.getPage(page.toURL());

} catch (Exception e) {

e.printStackTrace(System.out);

webClient.close();

return persona;

}

String pattern = patterns.get(host);

boolean isAnchor = false;

if (pattern.contains("@href")) {

isAnchor = true;

}

List> elements = htmlPage.getByXPath(patterns.get(host));

for (int i = 0; i < elements.size(); i++) {

String username = null;

if (isAnchor) {

String link = ((HtmlAnchor) elements.get(i)).getHrefAttribute();

if (isUserLink(link)) {

int index = link.lastIndexOf('/');

username = link.substring(index + 1);

}

} else {

if (elements.get(i) instanceof String) {

username = ((String) elements.get(i)).trim();

} else {

username = ((DomNode) elements.get(i)).asText();

}

}

if (username != null && !username.equals("")) {

persona.getUsernames().add(username);

}

}

webClient.close();

return persona;

}

  • 0
    点赞
  • 0
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值