/**
* 按照长度拆分字符串(若分段小于segment个,则填充""至数组),取实际段数与默认段数较大的作为返回的数量
* @param srcString
* @param seg_and_len [默认段数;每段长度;是否按byte划分], 默认为[20;3800;1];第三位默认为1,表示按照byte划分.
* @return
*/
public static List<String> splitXmlString(String srcString, int... seg_and_len){
List<String> retList = new ArrayList<String>();
int _SEGMENT = 20;
int _LEN = 3800;
int default_segment = _SEGMENT;
int field_len = _LEN;
int isSplitByByte = 1;
//检验每段长度的数值
if(seg_and_len.length==0){
default_segment = _SEGMENT;
field_len = _LEN;
isSplitByByte = 1;
}else if(seg_and_len.length==1) {//仅有默认段数
default_segment = (seg_and_len[0]>_SEGMENT) ? seg_and_len[0] : _SEGMENT;
field_len = _LEN;
isSplitByByte = 1;
}else if(seg_and_len.length==2) {//有默认段数与每段长度
default_segment = (seg_and_len[0]>_SEGMENT) ? seg_and_len[0] : _SEGMENT;
field_len = (seg_and_len[1]>0) ? seg_and_len[1] : _LEN;
isSplitByByte = 1;
}else if(seg_and_len.length==3) {//有默认段数、每段长度
default_segment = (seg_and_len[0]>_SEGMENT) ? seg_and_len[0] : _SEGMENT;
field_len = (seg_and_len[1]>0) ? seg_and_len[1] : _LEN;
isSplitByByte = (seg_and_len[2]==0) ? seg_and_len[2] : 1;
}
//检验输入字符串
if(null==srcString||srcString.equals("")){
for(int i=0;i<default_segment;i++){ //当输入没有的时候,返回default_segment个空字符串对象
retList.add(i,"");
}
return retList;
}
switch(isSplitByByte){
case 0://按照字符数划分
//实际应分段数
int relSegment = (int) Math.ceil( (double)srcString.length()/field_len );
//检验分段数
int seg_limit = (default_segment > relSegment) ? default_segment:relSegment;
for(int i=0;i<relSegment;i++){
if(i!=relSegment-1) {
retList.add( srcString.substring(i*field_len, i*field_len+field_len) );
}else{ //对实际分段的最后一段,填充剩下的所有字符串.
retList.add( srcString.substring(i*field_len, srcString.length()) );
}
}
//若分段小于seg_limit个,则填充""至数组
for(int i=relSegment;i<seg_limit;i++){
retList.add(i,"");
}
break;
case 1://按照byte数划分
retList = subStringSplitByByte(srcString, field_len);
int rel_size = (null==retList||retList.isEmpty())? 0 : retList.size();
if(rel_size < default_segment) {
for(int i=rel_size;i<default_segment;i++){
retList.add(i,"");
}
}
break;
default:
break;
}
return retList;
}
/**
* 根据字节长度长度len, 分割字符串src,
* @param src
* @param byte_len
* @return 由于中文的原因,返回的字符串数组,每组至多byte_len为字节.
*/
public static List<String> subStringSplitByByte(String src ,int byte_len){
List<String> retList = new ArrayList<String>();
if(null==src||src.isEmpty()){
return retList;
}
if(byte_len < 2) byte_len=src.length();
try {
byte[] bytes = src.getBytes("gbk");
int hanzi_cnt = 0;
int pos = 0;
int l_s = 0;
int r_s = 0;
for(int i=0;i<bytes.length;i++){
if(bytes[i]<0) {
hanzi_cnt++;
r_s += 1;
}else {
r_s += 2;
}
if( (i+1-pos) % byte_len==0) {
if(hanzi_cnt==0 || hanzi_cnt%2==0 ){
retList.add(src.substring(l_s/2, r_s/2 ));
pos = i+1;
hanzi_cnt = 0;
l_s = r_s;
continue;
}else{ // hanzi_cnt%2==1
retList.add(src.substring(l_s/2, r_s/2));
pos = i;
hanzi_cnt = 1;
l_s = r_s;
continue;
}
}
}
if(l_s/2 < src.length()){
retList.add(src.substring(l_s/2, src.length() ));
}
} catch (UnsupportedEncodingException e) {
logger.error("", e);
}
return retList;
}