如果采取指定长度巧合分割了一个中文,造成读取中文乱码,解决方法如下:
方式一:只保存读取正确的中文,从读取流中设置读取被分割的字符代码如下,采用
clear
private static StringBuilder readFromFile(String filePath) throws IOException {
FileChannel fileChannel = FileChannel.open(Paths.get(filePath), StandardOpenOption.READ);
ByteBuffer byteBuffer = ByteBuffer.allocate(1024);
CharBuffer charBuffer = CharBuffer.allocate(1024);
// 通过设置字符集的编码,并将ByteBuffer转换为CharBuffer来避免中文乱码
Charset charset = Charset.forName("UTF-8");
CharsetDecoder decoder = charset.newDecoder();
StringBuilder sb = new StringBuilder((int) fileChannel.size());
while (-1 != fileChannel.read(byteBuffer)){
byteBuffer.flip();
decoder.decode(byteBuffer, charBuffer, byteBuffer.limit() < 1024);
charBuffer.flip();
// System.out.println(charBuffer);
sb.append(charBuffer);
fileChannel.position(fileChannel.position()- (byteBuffer.limit()-byteBuffer.position()));
byteBuffer.clear();
charBuffer.clear();
}
fileChannel.close();
return sb;
}
方式二:采用compact,保存被截断的字符继续读取
private static StringBuilder readFromFile(String filePath) throws IOException {
FileChannel fileChannel = FileChannel.open(Paths.get(filePath), StandardOpenOption.READ);
int size = 10;
ByteBuffer byteBuffer = ByteBuffer.allocate(size);
CharBuffer charBuffer = CharBuffer.allocate(size);
// 通过设置字符集的编码,并将ByteBuffer转换为CharBuffer来避免中文乱码
Charset charset = Charset.forName("UTF-8");
CharsetDecoder decoder = charset.newDecoder();
StringBuilder sb = new StringBuilder((int) fileChannel.size());
while (-1 != fileChannel.read(byteBuffer)){
byteBuffer.flip();
decoder.decode(byteBuffer, charBuffer, byteBuffer.limit() < size);
charBuffer.flip();
sb.append(charBuffer);
// fileChannel.position(fileChannel.position()- (byteBuffer.limit()-byteBuffer.position()));
// byteBuffer.clear();
//采用 compact 等价 上面注释掉的2行
byteBuffer.compact();
charBuffer.clear();
}
fileChannel.close();
return sb;
}