Java处理UTF-8带BOM的文本的读写

什么是BOM

BOM(byte-order mark),即字节顺序标记,它是插入到以UTF-8、UTF16或UTF-32编码Unicode文件开头的特殊标记,用来识别Unicode文件的编码类型。对于UTF-8来说,BOM并不是必须的,因为BOM用来标记多字节编码文件的编码类型和字节顺序(big-endian或little- endian)。

BOMs 文件头:
00 00 FE FF = UTF-32, big-endian
FF FE 00 00 = UTF-32, little-endian
EF BB BF = UTF-8,
FE FF = UTF-16, big-endian
FF FE = UTF-16, little-endian



下面举个例子,针对UTF-8的文件BOM做个处理:

String xmla = StringFileToolkit.file2String(new File(“D:\\projects\\mailpost\\src\\a.xml”),“UTF-8”);

byte[] b = xmla.getBytes(“UTF-8”);

String xml = new String(b,3,b.length-3,“UTF-8”);

..............

思路是:先按照UTF-8编码读取文件后,跳过前三个字符,重新构建一个新的字符串,然后用Dom4j解析处理,这样就不会报错了。

其他编码的方式处理思路类似,其实可以写一个通用的自动识别的BOM的工具,去掉BOM信息,返回字符串。

不过这个处理过程已经有牛人解决过了:http://koti.mbnet.fi/akini/java/unicodereader/

‍Example code using UnicodeReader class
Here is an example method to read text file. It will recognize bom marker and skip it while reading.

//import ‍http://koti.mbnet.fi/akini/java/unicodereader/UnicodeReader.java.txt
public static char[] loadFile(String file) throws IOException {
// read text file, auto recognize bom marker or use
// system default if markers not found.
BufferedReader reader = null;
CharArrayWriter writer = null;
UnicodeReader r = new UnicodeReader(new FileInputStream(file), null);

char[] buffer = new char[16 * 1024]; // 16k buffer
int read;
try {
reader = new BufferedReader(r);
writer = new CharArrayWriter();
while( (read = reader.read(buffer)) != -1) {
writer.write(buffer, 0, read);
}
writer.flush();
return writer.toCharArray();
} catch (IOException ex) {
throw ex;
} finally {
try {
writer.close(); reader.close(); r.close();
} catch (Exception ex) { }
}
}


Example code to write UTF-8 with bom marker
Write bom marker bytes to start of empty file and all proper text editors have no problems using a correct charset while reading files. Java's OutputStreamWriter does not write utf8 bom marker bytes.


public static void saveFile(String file, String data, boolean append) throws IOException {
BufferedWriter bw = null;
OutputStreamWriter osw = null;

File f = new File(file);
FileOutputStream fos = new FileOutputStream(f, append);
try {
// write UTF8 BOM mark if file is empty
if (f.length() < 1) {
final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };
fos.write(bom);
}

osw = new OutputStreamWriter(fos, "UTF-8");
bw = new BufferedWriter(osw);
if (data != null) bw.write(data);
} catch (IOException ex) {
throw ex;
} finally {
try { bw.close(); fos.close(); } catch (Exception ex) { }
}
}




实际应用:
package com.dayo.gerber;

import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.OutputStreamWriter;
import java.io.Reader;
import java.util.Properties;

/**
*
* @author 刘飞(liufei)
*
*/
public class Generate4YYQTPScript {

private static final String ENCODING = "UTF-8";
private static final String GERBER_CONFIG = "config/gerber4yy.properties";

private static Properties GERBER_CONFIG_PROPS = null;
private static final String GERBER_FORMAT_DIALOG_TITLE_SCRIPT = "{#GERBER_FORMAT_DIALOG_TITLE}";
private static String GERBER_FORMAT_DIALOG_TITLE = "";

/* gerber properties parmters keys config */
private static final String QTP_SCRIPT_IN = "script.in";

private static final String QTP_SCRIPT_OUT = "script.out";

private static final String QTP_SYSTEM_PATH = "QTP.system.path";
private static final String QTP_SYSTEM_PATH_SCRIPT = "{#QTPSYSTEMPATH}";

private static final String GERBER_FILE_DRIVER_PATH = "gerber.file.driver.path";
private static final String GERBER_FILE_DRIVER_PATH_SCRIPT = "{#driver}";

private static final String GERBER_FILE_DRIVER = "gerber.file.driver";
private static final String GERBER_FILE_DRIVER_SCRIPT = "{#dr}";

private static final String GERBER_FILE_DIR = "gerber.file.dir";
private static final String GERBER_FILE_DIR_SCRIPT = "{#dirName}";

private static final String GERBER_FILE = "gerber.file";
private static final String GERBER_FILE_SCRIPT = "{#fileName}";

private static final String GERBER_OUT = "gerber.out";
private static final String GERBER_OUT_SCRIPT = "{#gerberout}";

private static final String VB_EXE_PATH = "vb.exe.path";

/* bigBoard props */
private static final String LEAGUE_BOARD_NUM_SCRIPT = "{#LEAGUE_BOARD_NUM}";
private static final String WIDTH_SCRIPT = "{#WIDTH}";
private static final String P_SCRIPT = "{#P}" ;
private static final String DY_SCRIPT = "{#DY}";

private Properties BIGBOARD_PROPS = null;

public Generate4YYQTPScript(Properties bigboard_props) {
super();
BIGBOARD_PROPS = bigboard_props;

try {
GERBER_CONFIG_PROPS = ConfigHelper
.getConfigProperties(GERBER_CONFIG);
GERBER_FORMAT_DIALOG_TITLE = GERBER_CONFIG_PROPS.getProperty(
GERBER_FILE_DRIVER).trim().toUpperCase()
+ "\\"
+ GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim()
.toUpperCase()
+ "\\"
+ GERBER_CONFIG_PROPS.getProperty(GERBER_FILE).trim()
.toUpperCase();
GERBER_FORMAT_DIALOG_TITLE = GERBER_FORMAT_DIALOG_TITLE.substring(0, 17) ;
} catch (IOException e) {
e.printStackTrace();
}
}

public static void main(String[] args) throws IOException {
Properties bigboard_props = new Properties() ;
bigboard_props.setProperty("{#LEAGUE_BOARD_NUM}", String.valueOf(4)) ;
bigboard_props.setProperty("{#WIDTH}", String.valueOf(new Double("54"))) ;
bigboard_props.setProperty("{#P}", String.valueOf(new Double("2"))) ;
bigboard_props.setProperty("{#DY}", String.valueOf(new Double("0.00"))) ;

Generate4YYQTPScript generateQTPScript = new Generate4YYQTPScript(bigboard_props);
generateQTPScript.generateQTPScript();
// RuntimeUtil.getInstance().run(generateQTPScript.getVBEXE(), 1, 50000);
}

public String getCheckOutFilePath() {
return GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DRIVER).trim() + "/"
+ GERBER_CONFIG_PROPS.getProperty(GERBER_FILE_DIR).trim();
}

public String getSavePath() {
return GERBER_CONFIG_PROPS.getProperty(GERBER_OUT);
}

public String getVBEXE() {
return GERBER_CONFIG_PROPS.getProperty(VB_EXE_PATH);
}

/**
* Generate QTP Script
*
* @return
* @throws IOException
*/
public File generateQTPScript() throws IOException {
return generateQTPScript(GERBER_CONFIG_PROPS
.getProperty(QTP_SCRIPT_OUT), GERBER_CONFIG_PROPS
.getProperty(QTP_SCRIPT_IN));
}

/**
* set value to script
*
* @param source
* @return
* @throws IOException
*/
private String scriptConvey(String source) throws IOException {
String _source = source;
_source = this.replace(this.replace(this.replace(
this.replace(this.replace(this.replace(this.replace(

_source
,
GERBER_FORMAT_DIALOG_TITLE_SCRIPT,
GERBER_FORMAT_DIALOG_TITLE), GERBER_FILE_SCRIPT,
GERBER_CONFIG_PROPS.getProperty(GERBER_FILE)),
GERBER_FILE_DRIVER_SCRIPT, GERBER_CONFIG_PROPS
.getProperty(GERBER_FILE_DRIVER)),
GERBER_OUT_SCRIPT, GERBER_CONFIG_PROPS
.getProperty(GERBER_OUT)),
GERBER_FILE_DIR_SCRIPT, GERBER_CONFIG_PROPS
.getProperty(GERBER_FILE_DIR)),
GERBER_FILE_DRIVER_PATH_SCRIPT, GERBER_CONFIG_PROPS
.getProperty(GERBER_FILE_DRIVER_PATH)),
QTP_SYSTEM_PATH_SCRIPT, GERBER_CONFIG_PROPS
.getProperty(QTP_SYSTEM_PATH));

if (this.BIGBOARD_PROPS != null) {
_source = this.replace(this.replace(this.replace(

_source

,
DY_SCRIPT, this.BIGBOARD_PROPS.getProperty(DY_SCRIPT)),
WIDTH_SCRIPT, this.BIGBOARD_PROPS
.getProperty(WIDTH_SCRIPT)),
LEAGUE_BOARD_NUM_SCRIPT, this.BIGBOARD_PROPS
.getProperty(LEAGUE_BOARD_NUM_SCRIPT));

_source = this.replace(_source, P_SCRIPT, this.BIGBOARD_PROPS.getProperty(P_SCRIPT)) ;
}

return _source;
}

/**
* Generate QTP Script
*
* @param target
* target file
* @param source
* source file
* @throws IOException
*/
public File generateQTPScript(File target, File source) throws IOException {
return generateQTPScript(target.getAbsolutePath(), source
.getAbsolutePath());
}

/**
* Generate QTP Script
*
* @param target
* target file path
* @param source
* source file path
* @return
* @throws IOException
*/
public File generateQTPScript(String target, String source)
throws IOException {
File f = new File(target);
if (!f.exists()) {
f.getParentFile().mkdirs();
try {
f.createNewFile();
} catch (Exception e) {
}
}
FileOutputStream fos = null;
OutputStreamWriter osw = null;
BufferedWriter bw = null;
try {
final byte[] bom = new byte[] { (byte)0xEF, (byte)0xBB, (byte)0xBF };
fos = new FileOutputStream(f);
osw = new OutputStreamWriter(fos, ENCODING);
bw = new BufferedWriter(osw);
fos.write(bom);
bw.write(scriptConvey(getSourceFileContentReader(source)));

bw.flush();
bw.close();
return f;
} catch (IOException e) {
throw e;
}
}

/**
* Reader convey to string
*
* @param source
* @return
* @throws IOException
*/
private String reader2String(Reader source) throws IOException {
BufferedReader bufferedReader = new BufferedReader(source);
StringBuffer result = new StringBuffer();
String buffer = null;
while ((buffer = bufferedReader.readLine()) != null) {
result.append(buffer + "\n");
}
return result.toString();
}

/**
*
* @param source
* file path
* @return
* @throws IOException
*/
private Reader getReader(String source) throws IOException {
return source == "" ? null : new BufferedReader(new InputStreamReader(
getInputStream(source)));
}

/**
* get script file content string
*
* @param source
* @return
* @throws IOException
*/
private String getSourceFileContentReader(String source) throws IOException {
return source == "" ? "" : reader2String(getReader(source));
}

/**
* get inputstream
*
* @param source
* file path
* @return
* @throws IOException
*/
private InputStream getInputStream(String source) throws IOException {
return source == "" ? null : new FileInputStream(new File(source));
}

/**
* Replace all occurences of a substring within a string with another
* string.
*
* @param inString
* String to examine
* @param oldPattern
* String to replace
* @param newPattern
* String to insert
* @return a String with the replacements
*/
private String replace(String inString, String oldPattern, String newPattern) {
if (!hasLength(inString) || !hasLength(oldPattern)
|| newPattern == null) {
return inString;
}
StringBuilder sb = new StringBuilder();
int pos = 0;
int index = inString.indexOf(oldPattern);
int patLen = oldPattern.length();
while (index >= 0) {
sb.append(inString.substring(pos, index));
sb.append(newPattern);
pos = index + patLen;
index = inString.indexOf(oldPattern, pos);
}
sb.append(inString.substring(pos));
return sb.toString();
}

private boolean hasLength(String str) {
return hasLength((CharSequence) str);
}

private boolean hasLength(CharSequence str) {
return (str != null && str.length() > 0);
}
}
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值