package html2txt;
import java.util.*;
import java.io.BufferedReader;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.text.SimpleDateFormat;
/**
* Do some format on web display.
*
* @author Xuefeng
*/
public class WebFormatter {
public static void main(String[] args)
{
BufferedReader br;
String HtmlStr="";
try {
br = new BufferedReader(
new FileReader("e:\\a.html"));
String t=null;
while((t=br.readLine())!=null){//读一行
HtmlStr=HtmlStr+t;
}
} catch (FileNotFoundException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
//HtmlStr="
String str=html2text(HtmlStr);
System.out.println(str);
}
public static String html2text(String html) {
if(html==null||html=="")
{
return "";
}
StringBuffer sb = new StringBuffer(html.length());
char[] data = html.toCharArray();
int start = 0;
boolean previousIsPre = false;
Token token = null;
for(;;) {
token = parse(data, start, previousIsPre);
if(token==null)
break;
previousIsPre = token.isPreTag();
sb = sb.append(token.getText());
start += token.getLength();
}
return sb.toString();
}
private static Token pars