[color=red]HTML网页[/color]
<html>
<title></title>
<head></head>
<body>
<table id="table1">
<tr>
<td class="a1">AAA</td>
<td class="a2">BBB</td>
<td class="a3">CCC</td>
</tr>
<tr>
<td class="a4">DDD</td>
<td class="a5">EEE</td>
<td class="a6">FFF</td>
</tr>
<tr>
<td class="a7">GGG</td>
<td class="a8">HHH</td>
<td class="a9">III</td>
</tr>
<tr>
<td class="a10">JJJ</td>
<td class="a11">KKK</td>
<td class="a12">LLL</td>
</tr>
</table>
<table id="table2">
<tr>
<td class="b1">MMM</td>
<td class="b2">NNN</td>
<td class="b3">OOO</td>
</tr>
<tr>
<td class="b4">PPP</td>
<td class="b5">QQQ</td>
<td class="b6">RRR</td>
</tr>
<tr>
<td class="b7">SSS</td>
<td class="b8">TTT</td>
<td class="b9">UUU</td>
</tr>
<tr>
<td class="b10">VVV</td>
<td class="b11">WWW</td>
<td class="b12">XXX</td>
</tr>
</table>
</body>
</html>
/**
*使用htmlparser解析类
*/
public class FilterExample {
private static void testParser(String url) {
try {
//生成一个解析器对象,用网页的 url 作为参数
Parser parser = new Parser(url);
//设置网页的编码,这里只是请求了一个UTF-8编码网页
parser.setEncoding("UTF-8");
// 方法一Filter:根据tag中的class或id去解析得到想要的数据
NodeFilter beginNodeFilter = new AndFilter(new TagNameFilter("td"),new HasAttributeFilter("class", "b1"));
NodeList nodeList = parser.extractAllNodesThatMatch(beginNodeFilter);
if (nodeList != null && nodeList.size() > 0) {
Node nameNode = nodeList.elementAt(0);
String name = nameNode.toPlainTextString().trim();
System.out.println(name);
}
parser.reset();
// class="b1"是唯一的,所以如果我们想得到 MMM 的话就可以直接这样取到
// 方法二Node:根据节点Table的tag去解析
// NodeFilter tableFilter = new TagNameFilter("table");
// NodeList nodeList = parser.extractAllNodesThatMatch(tableFilter);
// TableTag nodeTable = (TableTag)nodeList.elementAt(1); //第二个table的节点
// Node nodeTr = nodeTable.getChildren().elementAt(0).getNextSibling(); //table下面第一个子节点
// Node nodeTd = nodeTr.getChildren().elementAt(0).getNextSibling(); //tr下面第一个子节点
// String name = nodeTd.toPlainTextString().trim(); //返回纯文本信息
// System.out.println(name);
// parser.reset();
}catch(ParserException e){
e.printStackTrace();
}
//输出的结果同样是 MMM
}
public static void main(String[] args) {
testParser("E:/HttpClient/Noname1.html");
}
}