package IBMSpider;
import java.util.ArrayList;
import org.htmlparser.*;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
public class testHtmlParser {
/*此类是为了获得网页中的所有链接
*
* */
public static void main(String[] args) {
ArrayList<String> links= new ArrayList<String>();
try {
NodeFilter filter = new NodeClassFilter(LinkTag.class);
Parser parser = new Parser();
parser.setURL("http://www.cb.cityu.edu.hk/is/people/academic/");
parser.setEncoding(parser.getEncoding());
NodeList list = parser.extractAllNodesThatMatch(filter);
for (int i = 0; i < list.size(); i++) {
LinkTag node = (LinkTag) list.elementAt(i);
links.add(node.extractLink());
}
} catch (Exception e) {
e.printStackTrace();
}
}
}
Java LinkTag 获得网页中的所有链接
最新推荐文章于 2022-06-22 14:22:12 发布