经测试,实际用时1秒左右
private static final Pattern p = Pattern.compile("\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*");
public static ArrayList<String> extractEmail(int id,int start,int end){
ArrayList<String> list = new ArrayList<String>();
try {
for (int i = start; i <= end; i++) {
Parser parser = new Parser("http://tieba.baidu.com/p/"+id+"?pn="+i);
NodeFilter filter = new RegexFilter(
"\\w+([-+.]\\w+)*@\\w+([-.]\\w+)*\\.\\w+([-.]\\w+)*");
NodeList nodes = parser.extractAllNodesThatMatch(filter);
if (nodes.size() > 0) {
for (NodeIterator ni = nodes.elements(); ni.hasMoreNodes();) {
Matcher m = p.matcher(ni.nextNode().toHtml());
if (m.find()) {
list.add(m.group());
}
}
}
}
} catch (ParserException e) {
}
return list;
}