直接上代码
package util;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.InputStreamReader;
import java.net.HttpURLConnection;
import java.net.URL;
import org.htmlparser.Node;
import org.htmlparser.Parser;
import org.htmlparser.filters.TagNameFilter;
import org.htmlparser.util.NodeList;
public class HtmlLinkParser {
private static String ENCODE = "UTF-8";
public static String openFile( String szFileName ) {
try {
BufferedReader bis = new BufferedReader(new InputStreamReader(new FileInputStream( new File(szFileName)), ENCODE) );
String szContent="";
String szTemp;
while ( (szTemp = bis.readLine()) != null) {
szContent+=szTemp+"\n";
}
bis.close();
return szContent;
}
catch( Exception e ) {
return "";
}
}
public static void main(String[] args) {
try{
Parser parser = new Parser( (HttpURLConnection) (new URL("http://www.cjhdj.com.cn/CjhdjManage/water/allwaterList.jsp")).openConnection() );
NodeList trList = parser.extractAllNodesThatMatch(new TagNameFilter("tr"));
for(int i = 0; i< trList.size();i++){
if(i == 0) continue;
NodeList tdList = trList.extractAllNodesThatMatch(new TagNameFilter("td"),true);
for(int j = 0; j< tdList.size();j++){
Node td = tdList.elementAt(j);
System.out.println( td.toPlainTextString());
}
}
}
catch( Exception e ) {
System.out.println( "Exception:"+e );
}
}
}