package text;
import java.io.BufferedInputStream;
import java.io.FileOutputStream;
import java.io.FileWriter;
import java.io.IOException;
import java.io.PrintWriter;
import java.net.URL;
import org.w3c.tidy.Tidy;
/**
* @author Admin
*
*/
public class HtmlToXml {
private String url;
private String outFileName;
private String errOutFileName;
public HtmlToXml(String url, String outFileName, String errOutFileName) {
this.url = url;
this.outFileName = outFileName;
this.errOutFileName = errOutFileName;
}
public void cover() {
URL u;
BufferedInputStream in;
FileOutputStream out;
Tidy tidy = new Tidy();
tidy.setXmlOut(true);
try {
// 将错误信息保存到文件中
tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName),
true));
// System.out.println("url =" +url) ;
u = new URL(url);
// 创建一个输入输出流
in = new BufferedInputStream(u.openStream());
out = new FileOutputStream(outFileName);
// tidy.setCharEncoding("GBK") ;
// 转换文件
tidy.parse(in, out);
in.close();
out.close();
} catch (IOException e) {
// TODO Auto-generated catch block
// System.out.println(this.toString() + " " + e.toString());
e.printStackTrace();
}
}
public static void main(String[] args) {
//参数:HTML文件的URL和输出文件和错误文件名
System.out.println("文件开始运行...");
HtmlToXml htmlToXml = new HtmlToXml("http://localhost:8000/javaproject/text1.html","d://temp//htmlToXml.html","d://temp//error.txt") ;
htmlToXml.cover() ;
System.out.println("文件结束运行...") ;
}
}