字符串、汉字的拆分问题的解法
import java.util.ArrayList;
import java.io.*;
import java.util.Iterator;
public class GoodSplitString
{
public GoodSplitString(String s,int mod)
{
System.out.println("原始字符串为: "+s);
split1(s,mod);
split2(s,mod);
}
public void split1(String s,int mod)
{
System.out.println("不除掉乱码:split1(String,int)");
final int MOD=mod;
byte temp[] = new byte[MOD];
ArrayList list = new ArrayList();
try
{
byte[] stringToByte = s.getBytes("gb2312");
for (int i = 0; i < stringToByte.length; i++)
{
if ((i + MOD) > stringToByte.length) //这是判断是否是最后几个字母
{
byte[] tt = new byte[stringToByte.length - i]; //这里要用一个新的数组,或清空temp也行
System.arraycopy(stringToByte, i, tt, 0, stringToByte.length - i);
list.add(new String(tt, "gb2312"));
i = stringToByte.length;
}
else
{
System.arraycopy(stringToByte, i, temp, 0, MOD);
list.add(new String(temp, "gb2312"));
i = i + MOD - 1;
}
}
Iterator ite = list.iterator();
while (ite.hasNext())
{
System.out.println(ite.next());
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gb2312 ");
}
}
public void split2(String stringSrc,int mod)
{
System.out.println("除掉乱码:splist2(String,int)");
ArrayList list=new ArrayList();
int chLetter=0;
byte stringToByte[];
final int MOD;
int k=0;
byte temp[];
try
{
MOD=mod;
temp=new byte[MOD];
stringToByte=stringSrc.getBytes("gbk");
for(int i=0;i<stringToByte.length;i++)
{
if(stringToByte[i]<0)//根据GBK编码,汉字都是小于0,并分为2个byte存放
{
if(k<MOD-1)
{
temp[k]=stringToByte[i];
temp[++k]=stringToByte[++i];
k++;
if(k==MOD)
{
list.add(new String(temp,"gbk"));
k=0;
}
}
else
{
byte emitLetter[]=new byte[2];
emitLetter[0]=stringToByte[i];
emitLetter[1]=stringToByte[i+1];
i=i+1;
chLetter=chLetter+2;
System.out.println("除掉的汉字:" +(chLetter/2)+" "+new String(emitLetter,"gbk"));
}
}
else
{
temp[k]=stringToByte[i];
k++;
if (k == MOD)
{
list.add(new String(temp, "gbk"));
k = 0;
}
}
if(i==stringToByte.length-1)
{
byte [] lastData=new byte[k];
System.arraycopy(temp,0,lastData,0,k);
list.add(new String(lastData,"gbk"));
}
}
}
catch (UnsupportedEncodingException ex)
{
System.out.println("no gbk");
}
Iterator ite=list.iterator();
while(ite.hasNext())
System.out.println(ite.next());
}
public static void main(String[] args)
{
String s="a=我,人e们e为中以35经4产1人ie为joe经1发pl";
new GoodSplitString(s,6);
}
}
测试结果为:
原始字符串为: a=我,人e们e为中以35经4产1人ie为joe经1发pl
不除掉乱码:split1(String,int)
a=我,?
?e们e?
?中以3
5经4产
1人ie?
?joe经
1发pl
除掉乱码:splist2(String,int)
除掉的汉字:1 人
除掉的汉字:2 中
除掉的汉字:3 以
除掉的汉字:4 为
除掉的汉字:5 发
a=我,e
们e为3
5经4产
1人iej
oe经1p
l