按字节分割字符串
后台数据库有时不能使用CLOB存储数据的时候,需要将前台传递的字符串分割为更小的字符串存到多个字段中,这时如果使用substring的话,字符串中存在中文情况下,由于数据库字符类型存储字节数的限制,以oracle的varchar2为例,长度最长为4000字节,会造成字段长度不够的异常。这时需要以字节为单位分割字符串。
普通回退法,代码如下:
public static List<String> splitStringToListByByte(String srcStr, int byteLimit) {
if (srcStr == null || byteLimit < 3) {
throw new RuntimeException(String.format("error input param, source string %s, byte limit %s", srcStr, byteLimit));
}
List<String> strList = new ArrayList<>();
int startIndex = 0;
int strLength = srcStr.length();
int endIndex = byteLimit > strLength ? strLength : byteLimit;
String tmp = srcStr.substring(startIndex, endIndex);
while(endIndex <= srcStr.length()) {
while(tmp.getBytes().length > byteLimit) {
// 当前分割串字节超过分割限制,回退分割下标endIndex,重新分割继续判断
tmp = srcStr.substring(startIndex, --endIndex);
}
strList.add(tmp);
startIndex = endIndex;
endIndex = endIndex + byteLimit > strLength ? strLength : endIndex + byteLimit;
if (startIndex == endIndex) {
// 分割开始下标与结束下标相同时,整个字符串分割完毕,退出。
break;
}
tmp = srcStr.substring(startIndex, endIndex);
}
return strList;
}
二分法方式,代码参考如下。
public static List<String> splitStringToListWithByteLimitBinary(String srcStr, int byteLimit) {
if (srcStr == null || byteLimit < 2) {
throw new RuntimeException(String.format("error input param, source string %s, byte limit %s", srcStr, byteLimit));
}
List<String> result = new ArrayList<>();
// 初始化分割参数
int startIndex = 0;
int endIndex = Math.min(byteLimit, srcStr.length());
int lastEndIndex = 0;
while (startIndex != endIndex) {
String temp = srcStr.substring(startIndex, endIndex);
int tempByteLength = temp.getBytes(StandardCharsets.UTF_8).length;
if ((byteLimit >= tempByteLength && byteLimit - tempByteLength <= 2)
|| (tempByteLength < byteLimit && endIndex == srcStr.length())) {
result.add(temp);
startIndex = Math.min(startIndex + temp.length(), srcStr.length());
endIndex = Math.min(endIndex + byteLimit, srcStr.length());
lastEndIndex = 0;
} else if (tempByteLength > byteLimit) {
int tempLastEndIndex = endIndex;
if (lastEndIndex == 0) {
endIndex = (startIndex + endIndex + 1) / 2;
} else if (endIndex > lastEndIndex) {
endIndex = (endIndex + lastEndIndex + 1) / 2;
} else if (endIndex < lastEndIndex) {
endIndex = (endIndex * 3 - lastEndIndex) / 2;
}
lastEndIndex = tempLastEndIndex;
} else {
// 临时字符串字节长度小于限制字节数,endIndex往上二分。如果剩余长度不够二分,往srcStr逼近((endIndex + srcStr.length()) / 2)。
int tempLastEndIndex = endIndex;
if (lastEndIndex == 0) {
endIndex = Math.min(srcStr.length(), (endIndex * 3 - startIndex) / 2);
} else if (endIndex < lastEndIndex) {
endIndex = (lastEndIndex + endIndex + 1) / 2;
} else if (lastEndIndex < endIndex) {
endIndex = Math.min(srcStr.length(), (endIndex * 3 - lastEndIndex) / 2);
}
lastEndIndex = tempLastEndIndex;
}
}
return result;
}