public static void htmlpaser(String path) throws ParserException, IOException{
HashMap<String, String> cssmap=cssgetread.cssgetread(path);
Node node;
StringBuffer abstr = new StringBuffer();
BufferedReader reader= new BufferedReader(new FileReader(new File(path)));
String temp="";
while((temp=reader.readLine())!=null){
abstr.append(temp);
}
String result =abstr.toString();
Lexer lexer=null;
lexer = new Lexer( new Page(result, "GB2312"));
lexer.setNodeFactory( new PrototypicalNodeFactory());
List<Node> list=new ArrayList<Node>();
while(null!=(node=lexer.nextNode())){
list.add(node);
}
//循环读取list中node文本值
}
转载于:https://blog.51cto.com/boysky/1040847