package com.pachong.filter;
import java.net.URL;
import java.net.URLConnection;
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.HasChildFilter;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.nodes.RemarkNode;
import org.htmlparser.util.NodeList;
/**
* 类功能描述: 来自 : http://free0007.iteye.com/blog/1131163
* Create: 2016-12-6 下午3:43:35
*/
public class HasChildFilterTest {
public static void main(String[] args) throws Exception {
URL u = new URL("http://www.baidu.com");
URLConnection con = u.openConnection();
Parser p = new Parser(con);
//此处写filter
NodeFilter filter = new NodeClassFilter(RemarkNode.class);
NodeList list = p.extractAllNodesThatMatch(filter);
if(list!=null)
{
for (int i = 0; i < list.size(); i++) {
Node node = list.elementAt(i);
System.out.println(node.getText());
}
}
}
}
爬虫学习中 filter
最新推荐文章于 2024-02-07 08:40:19 发布