htmlparse中自定义节点
public class BoldTag extends CompositeTag{
private static final String[] mIds = new String[] {"B"};
public BoldTag ()
{
}
public String[] getIds ()
{
return (mIds);
}
public String[] getEnders ()
{
return (mIds);
}
public String[] getEndTagEnders ()
{
return (new String[0]);
}
}
对自定义的节点需要注册到PrototypicalNodeFactory中才可以正常使用
Parser parser = Parser.createParser(html, "UTF-8");
PrototypicalNodeFactory factory = (PrototypicalNodeFactory)parser.getNodeFactory();//获取PrototypicalNodeFactory
factory.registerTag(new BoldTag());//将自定义Tag注册进factory
NodeFilter filter = new TagNameFilter("b");
NodeList list = parser.extractAllNodesThatMatch(filter);
for(int i=0;i<list.size();i++){
Node node = list.elementAt(i);
TagNode tn = (TagNode)node;
System.out.println("==tohmtl=="+node.toHtml());
System.out.println("==class=="+node.getClass());
System.out.println(tn.getChildren().asString());
}