要求:
编写一个截取字符串的函数,输入为一个字符串和字节数,输出为按字节截取的字符串。 但是要保证汉字不被截半个,如“我ABC”4,应该截为“我AB”,输入“我ABC汉DEF”,6,应该输出为“我ABC”而不是“我ABC+汉的半个
package IO加强;
import java.io.UnsupportedEncodingException;
import java.util.Properties;
public class StringCut {
public static void main(String[] args) {
//gbk一般汉字是两个字节,字母是一个字节,而有些汉字比如说“琲”是-84 105(一正一负)
String str="你好hj琲琲琲";
byte[] bt;
// try {
// //bt=str.getBytes("gbk");//获得gbk字节码
// bt = str.getBytes("utf-8");
// } catch (UnsupportedEncodingException e) {
// e.printStackTrace();
// }
bt=str.getBytes();
//观察
for(byte b:bt){
System.out.print(b+" ");
}
System.out.println();
//测试
System.out.println(str);
System.out.println("-----------");
for(int i=0;i<=bt.length;i++){
String s=cutByByte(str,i);
//String s=cutByByteGbk(str,i);
//String s=cutByByteUtf8(str,i);
System.out.println("截取第"+i+"个字节结果是: "+s);
}
}
private static String cutByByteGbk(String str, int len){
try {
byte[] bt=str.getBytes("gbk");
int count=0;
for(int i=len-1;i>=0;i--){
if(bt[i]<0){
count++;
}else{
break;
}
}
if(count%2==0){
return new String(bt,0,len,"gbk");
}else{
return new String(bt,0,len-1,"gbk");
}
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("gbk字符串解码异常");
}
}
private static String cutByByteUtf8(String str, int len){
try {
byte[] bt=str.getBytes("utf-8");
int count=0;
for(int i=len-1;i>=0;i--){
if(bt[i]<0){
count++;
}else{
break;
}
}
if(count%3==0){
return new String(bt,0,len,"utf-8");
}else{
return new String(bt,0,len-(count%3),"utf-8");
}
} catch (UnsupportedEncodingException e) {
throw new RuntimeException("utf-8字符串解码异常");
}
}
private static String cutByByte(String str, int len){
if(System.getProperty("file.encoding").equalsIgnoreCase("gbk")){
return cutByByteGbk(str, len);
}else if(System.getProperty("file.encoding").equalsIgnoreCase("utf-8")){
return cutByByteUtf8(str, len);
}
return "";
}
}