import com.gargoylesoftware.htmlunit.html.HtmlPage; //导入方法依赖的package包/类
public Persona obtainPersonas(String host)
throws FailingHttpStatusCodeException, MalformedURLException,
IOException {
if (this.patterns == null
|| (this.patterns != null && this.patterns.isEmpty()))
initPatterns();
WebClient webClient = new WebClient();
webClient.getOptions().setJavaScriptEnabled(false);
webClient.getOptions().setThrowExceptionOnFailingStatusCode(false);
webClient.getOptions().setThrowExceptionOnScriptError(false);
HtmlPage htmlPage = null;
Persona persona = new Persona();
persona.setHostPatternKey(host);
;
persona.setPageId(page.toURI().toString());
try {
htmlPage = webClient.getPage(page.toURL());
} catch (Exception e) {
e.printStackTrace(System.out);
webClient.close();
return persona;
}
String pattern = patterns.get(host);
boolean isAnchor = false;
if (pattern.contains("@href")) {
isAnchor = true;
}
List> elements = htmlPage.getByXPath(patterns.get(host));
for (int i = 0; i < elements.size(); i++) {
String username = null;
if (isAnchor) {
String link = ((HtmlAnchor) elements.get(i)).getHrefAttribute();
if (isUserLink(link)) {
int index = link.lastIndexOf('/');
username = link.substring(index + 1);
}
} else {
if (elements.get(i) instanceof String) {
username = ((String) elements.get(i)).trim();
} else {
username = ((DomNode) elements.get(i)).asText();
}
}
if (username != null && !username.equals("")) {
persona.getUsernames().add(username);
}
}
webClient.close();
return persona;
}