转载自:http://topstar.blog.51cto.com/693408/14086
package htmlparser;
import java.util.HashMap;
import java.util.Map;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
public class GetLinkTest
{
public static void main(String[] args)
{
try
{
// 通过过滤器过滤出<A>标签
Parser parser = new Parser("http://www.google.cn");
NodeList nodeList = parser
.extractAllNodesThatMatch(new NodeFilter()
{
// 实现该方法,用以过滤标签
public boolean accept(Node node)
{
if (node instanceof LinkTag)// 标记
return true;
return false;
}
});
// 打印
for (int i = 0; i < nodeList.size(); i++)
{
LinkTag n = (LinkTag) nodeList.elementAt(i);
System.out.print(n.getStringText() + " ==>> ");
System.out.println(n.extractLink());
}
}
catch (Exception e)
{
e.printStackTrace();
}
}
}