private static String regex = "http://www.yifujx.com/cn/showclass\\.asp\\?id=\\d{1,2}&pid=\\d{1,2}";
private static String urlRegex = "http://www.yifujx.com/cn/showProduct\\.asp\\?ID=\\d{1,2}";
public static void main(String[] args) throws Exception {
// Parser parser = Parser.createParser("http://www.yifujx.com/cn/", "gb2312");
Parser parser = new Parser("http://www.yifujx.com/cn/");
HtmlPage htmlPage = new HtmlPage(parser);
parser.visitAllNodesWith(htmlPage);
NodeList nodeList = htmlPage.getBody();
NodeFilter nodeFilter = new TagNameFilter("A");
nodeList = nodeList.extractAllNodesThatMatch(nodeFilter, true);
int size = nodeList.size();
for(int i=0; i<size; i++){
LinkTag linkTag = (LinkTag)nodeList.elementAt(i);
String link = linkTag.getLink();
if( link.matches(regex) ){
System.out.println( link + "\t" + linkTag.getChildrenHTML());
}
}
}