编写一个截取字符串的函数,输入为一个字符串和字节数,输出为按字节截取的字符串,但要保证汉字不被截取半个,如“我
ABC
”,
4
,应该截取“我
AB
”,输入“我
ABC
汉
DEF
”,
6
,应该输出“我
ABC
”,而不是“我
ABC+
汉的半个”。
首先要了解中文字符有多种编码及各种编码的特征。假设n为要截取的字节数。
public class DismantleORsplit { public static void main( String [ ] args) throws Exception { String str = "我ABC汉def" ; ; byte bytes[ ] = str. getBytes ( "GBK" ) ; byte bytes2[ ] = str. getBytes ( "UTF-8" ) ; for ( int i= 0; i< bytes. length ; i+ + ) { System . out. print ( bytes[ i] + " " ) ; } int num = trimGBK( bytes, 3) ; int num2 = trimUTF( bytes2, 3) ; System . out. println ( num) ; System . out. println ( str. substring ( 0, num) ) ; System . out. println ( str. substring ( 0, num2) ) ; } private static int trimUTF( byte [ ] buf, int n) { int index = 0; int cPart = 0; for ( int i= 0; i< n; i+ + ) { if ( buf[ i] < 0) { cPart+ + ; if ( cPart % 3 = = 0) { index+ + ; } } else { index+ + ; } } return index; } public static int trimGBK( byte [ ] buf, int n) { int index = 0; boolean bChineseFirstHalf = false; for ( int i= 0; i< n; i+ + ) { if ( buf[ i] < 0 & & ! bChineseFirstHalf) { bChineseFirstHalf = true; } else { index+ + ; bChineseFirstHalf = false; } } return index; } }