java中判断文件为utf8编码格式_识别常见编码格式文件并转换成UTF-8编码的java实现...

package com.buptsse.ate.utils;

import java.io.BufferedInputStream;

import java.io.BufferedReader;

import java.io.File;

import java.io.FileInputStream;

import java.io.FileNotFoundException;

import java.io.FileOutputStream;

import java.io.IOException;

import java.io.InputStreamReader;

import java.io.OutputStreamWriter;

import java.io.UnsupportedEncodingException;

import java.io.Writer;

import java.util.ArrayList;

import java.util.List;

import java.util.regex.Matcher;

import java.util.regex.Pattern;

/**

*

* @author michaelw

* @email michael.wang1028@gmail.com

* @date 2012-09-03

*/

public class ConverEncoding {

static String CODE = "UTF-8";

static String FILE_SUFFIX = ".txt";//文件扩展名

// static String FILE_SUFFIX = ".css";

// static String FILE_SUFFIX = ".js";

// static String FILE_SUFFIX = ".htm";

static String srcDir = "C:\\WorkTools\\weenCompany_ChineseEnglish_JT_V5.3.0_UTF8";//文件所在目录

/**

*

* @param args

* @throws Exception

*/

public static void main(String[] args) throws Exception {

List files = new ArrayList();

fetchFileList(srcDir, files, FILE_SUFFIX);

String filecode = "";

for (String fileName : files) {

filecode = codeString(fileName);

if (!filecode.equals(CODE)) {

convert(fileName, filecode, fileName, CODE);

}

}

}

public static void convert(String oldFile, String oldCharset,

String newFlie, String newCharset) {

BufferedReader bin;

FileOutputStream fos;

StringBuffer content = new StringBuffer();

try {

System.out.println("the old file is :"+oldFile);

System.out.println("The oldCharset is : "+oldCharset);

bin = new BufferedReader(new InputStreamReader(new FileInputStream(

oldFile), oldCharset));

String line = null;

while ((line = bin.readLine()) != null) {

// System.out.println("content:" + content);

content.append(line);

content.append(System.getProperty("line.separator"));

}

bin.close();

File dir = new File(newFlie.substring(0, newFlie.lastIndexOf("\\")));

if (!dir.exists()) {

dir.mkdirs();

}

fos = new FileOutputStream(newFlie);

Writer out = new OutputStreamWriter(fos, newCharset);

out.write(content.toString());

out.close();

fos.close();

} catch (UnsupportedEncodingException e) {

e.printStackTrace();

} catch (FileNotFoundException e) {

e.printStackTrace();

} catch (IOException e) {

e.printStackTrace();

}

}

public static void fetchFileList(String strPath, List filelist,

final String regex) {

File dir = new File(strPath);

File[] files = dir.listFiles();

Pattern p = Pattern.compile(regex);

if (files == null)

return;

for (int i = 0; i < files.length; i++) {

if (files[i].isDirectory()) {

fetchFileList(files[i].getAbsolutePath(), filelist, regex);

} else {

String strFileName = files[i].getAbsolutePath().toLowerCase();

Matcher m = p.matcher(strFileName);

if (m.find()) {

filelist.add(strFileName);

}

}

}

}

/**

* 判断文件的编码格式

*

* @param fileName

* :file

* @return 文件编码格式

* @throws Exception

*/

public static String codeString(String fileName) throws Exception {

BufferedInputStream bin = new BufferedInputStream(new FileInputStream(

fileName));

int p = (bin.read() << 8) + bin.read();

String code = null;

switch (p) {

case 0xefbb:

code = "UTF-8";

break;

case 0xfffe:

code = "Unicode";

break;

case 0xfeff:

code = "UTF-16BE";

break;

default:

code = "GBK";

}

return code;

}

}

转自:http://www.cnblogs.com/DiYuShe/archive/2012/09/03/2668575.html

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值