说是原创,其实也参考了老师的PDF文档和java书籍,刚开始学,权且写写,献丑啦,O(∩_∩)O哈哈~
/*ReadLink.java*/
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import org.htmlparser.util.ParserException;
public class ReadLink {
public static void main(String[] args) throws IOException, ParserException{
try {
FileReader fr=new FileReader("E:/MyeElipseProject/新浪首页.htm");//FileReader可以将文件中的内容以字符的形式读出来
BufferedReader br=new BufferedReader(fr);//字符按行读出
String line="";
StringBuffer sb=new StringBuffer();//StringBuffer和String类似,不过其实可以改变的
while((line=br.readLine())!=null){
sb.append(line);
}
String result=new String(sb);
readLink1.getLink(result);
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
}
/*readLink1.java*/
import org.htmlparser.Node;
import org.htmlparser.NodeFilter;
import org.htmlparser.Parser;
import org.htmlparser.filters.NodeClassFilter;
import org.htmlparser.filters.OrFilter;
import org.htmlparser.tags.LinkTag;
import org.htmlparser.util.NodeList;
import org.htmlparser.util.ParserException;
public class readLink1 {
public static void getLink(String result) throws ParserException{
Parser parser;
NodeList nodeList;
parser=Parser.createParser(result, "utf-8");
NodeFilter linkFilter=new NodeClassFilter(LinkTag.class);
OrFilter lastFilter=new OrFilter();
lastFilter.setPredicates(new NodeFilter[]{linkFilter});
nodeList=parser.parse(lastFilter);
Node[] nodes=nodeList.toNodeArray();
String link="";
String linkText="";
for(int i=0;i<nodes.length;i++){
Node node=nodes[i];
if(node instanceof LinkTag)
{
LinkTag linkNode=(LinkTag)(node);
link=linkNode.getLink();
linkText=linkNode.getLinkText();
}
System.out.print(link);
System.out.print(linkText);
}
}
}