碰到可能会截取汉字的情况,当然是要不能截取出乱码来,就是不能对整个汉字截取一半。如"我ABC汉字d"这个字符串,截取5个字节的时候,应该是"我ABC",而截取8个字节的时候,应该是"我ABC汉",而不应该是"我ABC汉?",其中"?"为半个汉字,可理解为向前截取
备注:将字符编码GBK改为UTF-8,则每个中文长度按3个字符计算
以下方法是向后截取字符串
以下代码会出现半个汉字问题
public static String subStr(String str, int subSLength)
throws UnsupportedEncodingException{
if (str == null)
return "";
else{
int tempSubLength = subSLength;//截取字节数
String subStr = str.substring(0, str.length()<subSLength ? str.length() : subSLength);//截取的子串
int subStrByetsL = subStr.getBytes("GBK").length;//截取子串的字节长度
//int subStrByetsL = subStr.getBytes().length;//截取子串的字节长度
// 说明截取的字符串中包含有汉字
while (subStrByetsL > tempSubLength){
int subSLengthTemp = --subSLength;
subStr = str.substring(0, subSLengthTemp>str.length() ? str.length() : subSLengthTemp);
subStrByetsL = subStr.getBytes("GBK").length;
//subStrByetsL = subStr.getBytes().length;
}
return subStr;
}
}
备注:将字符编码GBK改为UTF-8,则每个中文长度按3个字符计算
以下方法是向后截取字符串
public static String subStr_1(String str, int start, int end)
throws UnsupportedEncodingException{
if (str == null) return null;
String chinese = "[\u0391-\uFFE5]";
byte[] b = str.getBytes("UTF-8");
String temp = new String(b, start, end);
String last = getLastStr(temp);
while(!last.matches(chinese)){
temp = new String(b, start, ++end);
last = getLastStr(temp);
}
return new String(b, start, end);
}
public static String getByteStr(String str, int start, int end) throws UnsupportedEncodingException{
byte[] b = str.getBytes("UTF-8");
return new String(b, start, end);
}
以下代码会出现半个汉字问题
/**
* ReadFileByteBuffer.java
* cn.com.songjy.test.io
* Function: TODO
*
* version date author
* ──────────────────────────────────
* 1.0 2013-8-31 songjy
*
* Copyright (c) 2013, TNT All Rights Reserved.
*/
package cn.com.songjy.test.io;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.nio.ByteBuffer;
import java.nio.CharBuffer;
import java.nio.channels.FileChannel;
import java.nio.charset.Charset;
import java.nio.charset.CharsetDecoder;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
/**
* ClassName:ReadFileByteBuffer
*
* @author songjy
* @version 1.0
* @since v1.0
* @Date 2013-8-31 下午12:15:21
*/
public class ReadFileByteBuffer {
private static Log log = LogFactory.getLog(ReadFileByteBuffer.class);
public static void main(String[] args) {
try {
FileInputStream fis = new FileInputStream("pom.xml");/*创建文件输入流*/
FileChannel fc = fis.getChannel();
ByteBuffer bb = ByteBuffer.allocate(64);//每次取出64字节
/*将FileChannel中的数据放入ByteBuffer*/
while(-1 != fc.read(bb)){
bb.flip();/*锁定ByteBuffer的空白区*/
Charset charset = Charset.forName("UTF-8");/*创建Charset对象*/
CharsetDecoder decoder = charset.newDecoder();/*创建解码器(CharsetDecoder)对象*/
CharBuffer cb = decoder.decode(bb);/*将ByteBuffer的内容转码*/
log.info(cb);
bb.clear();
}
} catch (FileNotFoundException e) {
log.error(e.getMessage(), e);
} catch (IOException e) {
log.error(e.getMessage(), e);
}
}
}
/*会出现java.nio.charset.MalformedInputException错误,原因是“半个中文问题”*/