htmlparser很适用的一个工具项目,但是现在已经没更新了,下载地址:http://htmlparser.sourceforge.net/。
例子:解析网页中table内容的实例核心代码如下:
URL url = new URL("http://"); //需要解析的网址
HttpURLConnection connection = (HttpURLConnection) url.openConnection();
Parser parser = new Parser();
parser.setConnection(connection);
parser.setEncoding(parser.getEncoding());
NodeFilter filter_tab = new TagNameFilter("table");
NodeList nodelist = parser.parse(filter_tab);
HashMap map = null;
ArrayList list = new ArrayList();
if (nodelist != null && nodelist.size() > 0) {
for (int i = 0; i < nodelist.size(); i++) {
TableTag table = (TableTag) nodelist.elementAt(i);
TableRow[] rows = table.getRows();
if (rows != null && rows.length > 0) {
for (int j = 0; j < rows.length; j++) {
map = new HashMap();
TableColumn[] cols = rows[j].getColumns();
for (int k = 0; k < cols.length; k++) {
System.out.println(cols[k].toHtml().trim());
map.put(k, cols[k].toPlainTextString().trim());
}
}
}
}
}