public class CharsetConvertor {
public final static String PATH = "";
public final static String[] FILTER_WORD = new String[] { };
/**
* @param args
*/
public static void main(String[] args) {
cpdetector.io.CodepageDetectorProxy detector = cpdetector.io.CodepageDetectorProxy
.getInstance();
detector.add(cpdetector.io.JChardetFacade.getInstance());
java.nio.charset.Charset charset = null;
File dir = new File(PATH);
if (dir.isDirectory()) {
File[] files = dir.listFiles();
for (File file : files) {
if (!file.isDirectory()) {
try {
charset = detector.detectCodepage(file.toURL());
} catch (Exception ex) {
System.err.println("获取文件[" + file.getAbsolutePath() + "]编码失败!");
ex.printStackTrace();
}
if (charset == null || !"GB2312".equals(charset.name())) {
// 对于非gbk的转换为gbk
String path = file.getParent() + "\\gbk\\";
File gbkDir = new File(path);
if (!gbkDir.exists()) {
if (!gbkDir.mkdir()) {
throw new RuntimeException("创建gbk文件夹[" + path + "]失败");
}
}
String name = file.getName();
for (String word : FILTER_WORD) {
name = name.replaceAll(word, "");
}
String fileName = path + name;
System.out.println(fileName);
try {
convert(file.getAbsolutePath(), fileName, "UTF-8", "GB2312");
} catch (UnsupportedEncodingException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}
}
} else {
throw new RuntimeException("指定的路径[" + dir.getAbsolutePath() + "]不正确.");
}
}
/**
*
* @param infile
* 源文件路径
* @param outfile
* 输出文件路径
* @param fromCharset
* 源文件编码
* @param toCharset
* 目标文件编码
* @throws IOException
* @throws UnsupportedEncodingException
*/
public static void convert(String infile, String outfile, String fromCharset, String toCharset)
throws IOException, UnsupportedEncodingException {
// set up byte streams
InputStream in;
if (infile != null)
in = new FileInputStream(infile);
else
in = System.in;
OutputStream out;
if (outfile != null)
out = new FileOutputStream(outfile);
else
out = System.out;
// Use default encoding if no encoding is specified.
if (fromCharset == null)
fromCharset = System.getProperty("file.encoding");
if (toCharset == null)
toCharset = System.getProperty("file.encoding");
// Set up character stream
Reader r = new BufferedReader(new InputStreamReader(in, fromCharset));
Writer w = new BufferedWriter(new OutputStreamWriter(out, toCharset));
// Copy characters from input to output. The InputStreamReader
// converts from the input encoding to Unicode,, and the
// OutputStreamWriter
// converts from Unicode to the output encoding. Characters that cannot
// be
// represented in the output encoding are output as '?'
char[] buffer = new char[4096];
int len;
while ((len = r.read(buffer)) != -1)
w.write(buffer, 0, len);
r.close();
w.flush();
w.close();
}
}
文件字符编码转换工具
最新推荐文章于 2024-08-05 15:29:58 发布