首先将源网页用UTF-8重新编码放到一个新的文件, 还要注意加上: tidy.setInputEncoding("UTF-8"); 才能正确显示 源代码如下: import java.net.URL; import java.util.logging.Level; import java.util.logging.Logger; import java.io.*; import org.w3c.tidy.Tidy; public class xml { private String url; private String outFileName; private String errOutFileName; public xml(String url, String outFileName, String errOutFileName) { this.url = url; this.outFileName = outFileName; this.errOutFileName = errOutFileName; } public void convert() { URL u; BufferedInputStream in; FileOutputStream out; Logger log = Logger.getLogger("convert"); try { u = new URL(url); //Create input and output streams in = new BufferedInputStream(u.openStream()); // 打开文件,转换为 UTF-8 编码 InputStreamReader isr = new InputStreamReader(in, "GB2312"); // 源文件编码为 gb2312 File tmpNewFile = File.createTempFile("GB2312",".html"); // 转换后的文件,设定编码为 utf-8 out = new FileOutputStream( tmpNewFile ); // 需要将文件转换为字符流 OutputStreamWriter osw = new OutputStreamWriter( out , "UTF-8"); // 指定目标编码为 utf-8 osw.write("<?xml version=/"1.0/" encoding=/"utf-8/"?>/n"); char[] buffer = new char[10240]; // 文件缓冲区 int len = 0; // 使用字符读取方式,循环读取源文件内容 while( (len = isr.read(buffer)) !=-1 ) // 转换后写入目标文件中 { osw.write( buffer, 0, len); } osw.close(); // 转换完成 isr.close(); out.close(); in.close(); if( log.isLoggable( Level.INFO)){ log.info("HTML 文档转 UTF-8 编码完成!"); } //设置tidy Tidy tidy = new Tidy(); // Set file for error messages tidy.setErrout(new PrintWriter(new FileWriter(errOutFileName), true)); // Tell Tidy to convert HTML to XML tidy.setXmlOut(true); tidy.setInputEncoding("UTF-8"); FileInputStream in0 = new FileInputStream( tmpNewFile ); FileOutputStream out0 = new FileOutputStream(outFileName); //Convert files tidy.parse(in0, out0); //Clean up in.close(); out.close(); tmpNewFile.delete(); // 删除临时文件 } catch (IOException e) { System.out.println(this.toString() + e.toString()); } } public static void main(String[] args) { /* * Parameters are: * URL of HTML file * Filename of output file * Filename of error file */ String u="http://www.baidu.com/"; String o="index.xml"; String e="error.xml"; xml t = new xml(u, o, e); t.convert(); System.out.println("OK!"); } }