文件字符编码转换工具

最新推荐文章于 2024-08-05 15:29:58 发布

iteye_14294

最新推荐文章于 2024-08-05 15:29:58 发布

阅读量216

点赞数

分类专栏： Life 文章标签： UP

本文链接：https://blog.csdn.net/iteye_14294/article/details/81618505

版权

Life 专栏收录该内容

71 篇文章 0 订阅

订阅专栏


public class CharsetConvertor {
	public final static String PATH = "";
	public final static String[] FILTER_WORD = new String[] {  };

	/**
	 * @param args
	 */
	public static void main(String[] args) {
		cpdetector.io.CodepageDetectorProxy detector = cpdetector.io.CodepageDetectorProxy
				.getInstance();
		detector.add(cpdetector.io.JChardetFacade.getInstance());
		java.nio.charset.Charset charset = null;
		File dir = new File(PATH);
		if (dir.isDirectory()) {
			File[] files = dir.listFiles();
			for (File file : files) {
				if (!file.isDirectory()) {
					try {
						charset = detector.detectCodepage(file.toURL());
					} catch (Exception ex) {
						System.err.println("获取文件[" + file.getAbsolutePath() + "]编码失败!");
						ex.printStackTrace();
					}

					if (charset == null || !"GB2312".equals(charset.name())) {
						// 对于非gbk的转换为gbk
						String path = file.getParent() + "\\gbk\\";
						File gbkDir = new File(path);
						if (!gbkDir.exists()) {
							if (!gbkDir.mkdir()) {
								throw new RuntimeException("创建gbk文件夹[" + path + "]失败");
							}
						}
						String name = file.getName();
						for (String word : FILTER_WORD) {
							name = name.replaceAll(word, "");
						}
						String fileName = path + name;
						System.out.println(fileName);

						try {
							convert(file.getAbsolutePath(), fileName, "UTF-8", "GB2312");
						} catch (UnsupportedEncodingException e) {
							e.printStackTrace();
						} catch (IOException e) {
							e.printStackTrace();
						}
					}

				}
			}
		} else {
			throw new RuntimeException("指定的路径[" + dir.getAbsolutePath() + "]不正确.");
		}

	}

	/**
	 * 
	 * @param infile
	 *            源文件路径
	 * @param outfile
	 *            输出文件路径
	 * @param fromCharset
	 *            源文件编码
	 * @param toCharset
	 *            目标文件编码
	 * @throws IOException
	 * @throws UnsupportedEncodingException
	 */
	public static void convert(String infile, String outfile, String fromCharset, String toCharset)
			throws IOException, UnsupportedEncodingException {
		// set up byte streams
		InputStream in;
		if (infile != null)
			in = new FileInputStream(infile);
		else
			in = System.in;
		OutputStream out;
		if (outfile != null)
			out = new FileOutputStream(outfile);
		else
			out = System.out;

		// Use default encoding if no encoding is specified.
		if (fromCharset == null)
			fromCharset = System.getProperty("file.encoding");
		if (toCharset == null)
			toCharset = System.getProperty("file.encoding");

		// Set up character stream
		Reader r = new BufferedReader(new InputStreamReader(in, fromCharset));
		Writer w = new BufferedWriter(new OutputStreamWriter(out, toCharset));

		// Copy characters from input to output. The InputStreamReader
		// converts from the input encoding to Unicode,, and the
		// OutputStreamWriter
		// converts from Unicode to the output encoding. Characters that cannot
		// be
		// represented in the output encoding are output as '?'
		char[] buffer = new char[4096];
		int len;
		while ((len = r.read(buffer)) != -1)
			w.write(buffer, 0, len);
		r.close();
		w.flush();
		w.close();
	}

}