现在需要将字符按字节数截取,其中要避免中文被截取成乱码
之前看的别的哥们的写法:Java字符串按照字节数进行截取_java按字节长度截取字符串-CSDN博客
但他的方法不够实用,我稍微改良了一下:
/**
* 将字符串转成字符串数组
* @param string
* @return
*/
public static String[] string2Array(String string)
{
String[] temp = new String[string.length()];
for (int i = 0; i < temp.length; i++) {
temp[i] = string.substring(i,i+1);
}
return temp;
}
/**
* 字符串按字节数截取
* @param str
* @param n
*/
public static List<String> interception(String[] str, int n)
{
int count = 0;
List<String> list = new ArrayList<String>();
String m = "[\u4e00-\u9fa5]";
System.out.println("以每" + n +"个字节划分的字符串如下所示:");
StringBuffer sb = new StringBuffer();
StringBuffer tmpSb = new StringBuffer();
for (int i=0; i < str.length; i++) {
if (str[i].matches(m)) {
//如果当前字符是汉子,计数器加2
count += 2;
} else {
//如果当前字符不是是汉子,计数器加1
count += 1;
}
//如果当前计数器的值小于n,则直接输出当前字符
if (count < n) {
sb.append(str[i]);
} else if (count == n) {
count = 0;
sb.append(str[i]);
//满足n个字节后,就换行
list.add(sb.toString());
sb = new StringBuffer();
} else {//如果当前计数器count的值大于n,说明有汉子,换行输出,且此时计数器count=2
count = 2;
list.add(sb.toString());
sb = new StringBuffer();
sb.append(str[i]);
}
}
if(!sb.toString().equals("")) {
list.add(sb.toString());
}
return list;
}
效果如下:
public static void main(String[] args) {
String test = "啊呃哦吴奥啊";
System.out.println(interception(string2Array(test), 2) + "\n");
System.out.println(interception(string2Array(test), 3) + "\n");
System.out.println(interception(string2Array(test), 5) + "\n");
System.out.println(interception(string2Array(test), 7) + "\n");
System.out.println(interception(string2Array(test), 9) + "\n");
System.out.println(interception(string2Array(test), 11) + "\n");
System.out.println(interception(string2Array(test), 13) + "\n");
System.out.println(interception(string2Array(test), 15) + "\n");
}
孩子,你无敌了