import org.apache.commons.io.Charsets; //导入方法依赖的package包/类
/**
* Creates a ReversedLinesFileReader with the given block size and encoding.
*
* @param file
* the file to be read
* @param blockSize
* size of the internal buffer (for ideal performance this should
* match with the block size of the underlying file system).
* @param encoding
* the encoding of the file
* @throws IOException if an I/O error occurs
* @since 2.3
*/
public ReversedLinesFileReader(final File file, final int blockSize, final Charset encoding) throws IOException {
this.blockSize = blockSize;
this.encoding = encoding;
randomAccessFile = new RandomAccessFile(file, "r");
totalByteLength = randomAccessFile.length();
int lastBlockLength = (int) (totalByteLength % blockSize);
if (lastBlockLength > 0) {
totalBlockCount = totalByteLength / blockSize + 1;
} else {
totalBlockCount = totalByteLength / blockSize;
if (totalByteLength > 0) {
lastBlockLength = blockSize;
}
}
currentFilePart = new FilePart(totalBlockCount, lastBlockLength, null);
// --- check & prepare encoding ---
Charset charset = Charsets.toCharset(encoding);
CharsetEncoder charsetEncoder = charset.newEncoder();
float maxBytesPerChar = charsetEncoder.maxBytesPerChar();
if(maxBytesPerChar==1f) {
// all one byte encodings are no problem
byteDecrement = 1;
} else if(charset == Charset.forName("UTF-8")) {
// UTF-8 works fine out of the box, for multibyte sequences a second UTF-8 byte can never be a newline byte
// http://en.wikipedia.org/wiki/UTF-8
byteDecrement = 1;
} else if(charset == Charset.forName("Shift_JIS")) {
// Same as for UTF-8
// http://www.herongyang.com/Unicode/JIS-Shift-JIS-Encoding.html
byteDecrement = 1;
} else if(charset == Charset.forName("UTF-16BE") || charset == Charset.forName("UTF-16LE")) {
// UTF-16 new line sequences are not allowed as second tuple of four byte sequences,
// however byte order has to be specified
byteDecrement = 2;
} else if(charset == Charset.forName("UTF-16")) {
throw new UnsupportedEncodingException(
"For UTF-16, you need to specify the byte order (use UTF-16BE or UTF-16LE)");
} else {
throw new UnsupportedEncodingException(
"Encoding "+encoding+" is not supported yet (feel free to submit a patch)");
}
// NOTE: The new line sequences are matched in the order given, so it is important that \r\n is BEFORE \n
newLineSequences = new byte[][] { "\r\n".getBytes(encoding), "\n".getBytes(encoding), "\r".getBytes(encoding) };
avoidNewlineSplitBufferSize = newLineSequences[0].length;
}