FileWriter和FileReader确实使用系统当前默认的编码方式;
FileWriter和FileReader都不支持通过参数指定编码方式,而OutputStreamWriter和InputStreamReader可以。这两个类从名字上就可以看到是字节流和字符流的组合,实际上也是连接两者的桥梁
//Writing to a File , encoding is utf-8
public static void writeToFileUTF8(String str, String fileName) throws IOException {
File f = new File(fileName);
if (!f.exists()) {
f.createNewFile();
}
BufferedWriter writer=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f),"UTF-8"));
writer.write(str);
writer.close();
}
// Appending to a File , encoding is utf-8
public static void appendToFileUTF8(String str, String fileName) throws IOException {
File f = new File(fileName);
if (!f.exists()) {
f.createNewFile();
}
BufferedWriter writer=new BufferedWriter(new OutputStreamWriter(new FileOutputStream(f, true),"UTF-8"));
writer.write(str);
writer.close();
}
private static Map<String,String> readFile(String fileName) throws Exception{
File file = new File(fileName);
Map<String,String> contentMap = new HashMap<String, String>();
BufferedReader reader = null;
try {
String enCode = getFileCharacterEncode(fileName);
reader = new BufferedReader(new InputStreamReader(new FileInputStream(file),enCode));
String tempString = null;
StringBuffer sb = new StringBuffer();
while ((tempString = reader.readLine()) != null) {
//...
}
if(sb != null){
contentMap.put("其他", sb.toString());
}
reader.close();
} catch (IOException e) {
log.error("read file error , file name is : " + fileName,e);
throw new Exception();
} finally {
if (reader != null) {
try {
reader.close();
} catch (IOException e1) {
}
}
}
return contentMap;
}
//得到文件的编码格式 utf-8时,有BOM的文件前3个字节为 EF BB BF
private static String getFileCharacterEncode(String fileName) {
int p = 0;
try {
BufferedInputStream bis = new BufferedInputStream(new FileInputStream(fileName));
p = (bis.read() << 8) + bis.read();
} catch (IOException e) {
log.error("get file encode error, file name is : " + fileName , e);
}
String code = "";
switch (p) {
case 0xefbb:
code = "UTF-8";
break;
case 0xfffe:
code = "Unicode";
break;
case 0xfeff:
code = "UTF-16BE";
break;
default:
code = "GBK";
}
log.info("file encode is : " + code);
return code;
}