用Java实现按字节长度截取中英文数字字符串的方法总结

最新推荐文章于 2024-06-29 03:22:52 发布

stefshawn

最新推荐文章于 2024-06-29 03:22:52 发布

阅读量1.1k

点赞数

分类专栏： JavaSE 文章标签： java byte null string

JavaSE 专栏收录该内容

20 篇文章 0 订阅

订阅专栏

方法一

//jdk1.4.2.05

    Java代码   
    
  
 /** 
  * @author cn 
  * @param s 要截取的字符串 
  * @param length 要截取字符串的长度->是字节一个汉字2个字节 
  * return 返回length长度的字符串（含汉字） 
 */  
 private static String getTitleToTen(String s, int length) throws Exception  
    {  
   
        byte[] bytes = s.getBytes("Unicode");  
        int n = 0;  
        int i = 2;  
        for (; i < bytes.length && n < length; i++){  
         if (i % 2 == 0){  
                n++;  
            }else{  
                if (bytes[i] != 0){  
                    n++;  
                }  
            }  
        }  
        /*if (i % 2 == 1){ 
            if (bytes[i - 1] == 0) 
                i = i - 1; 
            else 
                i = i + 1; 
        }*/  
        //将截一半的汉字要保留  
        if (i % 2 == 1){  
         i = i + 1;  
        }  
        String eside = ".................................................................";  
        byte[] byteEside = eside.getBytes("Unicode");  
        String title = "";  
        if (bytes[i-1] == 0){  
         title = new String(bytes, 0, i, "Unicode")+new String(byteEside,0,40,"Unicode");  
        }else{  
         title = new String(bytes, 0, i, "Unicode")+new String(byteEside,0,38,"Unicode");  
        }  
        return title;  
    }  

//方法改进

    Java代码   
    
  
 /** 
      * @author cn 
      * @param s 要截取的字符串 
      * @param length 要截取字符串的长度->是字节一个汉字2个字节 
      * return 返回length长度的字符串（含汉字） 
     */  
     private static String getTitleToTen(String s, int length) throws Exception  
     {  
   
         String title = "";  
         s = s.trim();  
         byte[] bytes = s.getBytes("Unicode");  
         int n = 0;  
         int i = 2;  
         int chineseNum = 0;  
         int englishNum = 0;  
         for (; i < bytes.length && n < length; i++){  
             if (i % 2 == 0){  
                 n++;  
             }else{  
                 if (bytes[i] != 0){  
                     n++;  
                     chineseNum++;  
                 }else{  
                     englishNum++;  
                 }  
             }  
         }  
         /*if (i % 2 == 1){ 
             if (bytes[i - 1] == 0) 
                 i = i - 1; 
             else 
                 i = i + 1; 
         }*/  
         //将截一半的汉字要保留  
         if (i % 2 == 1){  
             i = i + 1;  
         }  
     //最后一个为非汉字则英文字符加一  
         if (bytes[i-1] == 0){  
             englishNum++;  
                  
         }else if (englishNum % 2 != 0){//如果英文字符mod 2 ！= 0 代表有奇数个英文字符，所以汉字个数加一  
             chineseNum++;  
         }  
         String eside = ".................................................................";  
         String str = new String(bytes,0,i,"Unicode");  
         StringBuffer ssss = new StringBuffer(str);  
         ssss.append(eside);  
         byte[] byteTitle = ssss.toString().getBytes("Unicode");  
         int lll = (length*4-4)-2*chineseNum;//length截取字符串字节数（length*2）*（length*2）[length*2]代表参数s,和length转换成bytes[] 的字节数  
         title = new String(byteTitle,0,lll,"Unicode");  
         return title;  
     }  

//jdk1.6.0.06

    Java代码   
    
  
 /** 
  * @author cn 
  * @param s 要截取的字符串 
  * @param length 要截取字符串的长度->是字节一个汉字2个字节 
  * return 返回length长度的字符串（含汉字） 
 */  
    public static String bSubstring(String s, int length) throws Exception  
    {  
   
        byte[] bytes = s.getBytes("Unicode");  
        int n = 0; // 表示当前的字节数  
        int i = 2; // 要截取的字节数，从第3个字节开始  
        for (; i < bytes.length && n < length; i++){  
            // 奇数位置，如3、5、7等，为UCS2编码中两个字节的第二个字节  
            if (i % 2 == 1){  
                n++; // 在UCS2第二个字节时n加1  
            }  
            else{  
                // 当UCS2编码的第一个字节不等于0时，该UCS2字符为汉字，一个汉字算两个字节  
                if (bytes[i] != 0){  
                    n++;  
                }  
            }  
              
        }  
        // 如果i为奇数时，处理成偶数  
        /*if (i % 2 == 1){ 
            // 该UCS2字符是汉字时，去掉这个截一半的汉字 
            if (bytes[i - 1] != 0) 
                i = i - 1; 
            // 该UCS2字符是字母或数字，则保留该字符 
            else 
                i = i + 1; 
        }*/  
        //将截一半的汉字要保留  
        if (i % 2 == 1){  
         i = i + 1;  
        }  
        return new String(bytes, 0, i, "Unicode");  
    }  

方法二

    Java代码   
    
  
 package proc;  
   
 public class Tools {  
      public Tools() {     
      }      
     /**  
       * 字符串按字节截取  
       * @param str 原字符  
       * @param len 截取长度  
       * @return String  
       * @author kinglong  
       * @since 2006.07.20  
       */     
      public static String splitString(String str, int len) {     
             return splitString(str, len, ".......");     
       }     
     
      /**  
        * 字符串按字节截取  
        * @param str 原字符  
        * @param len 截取长度  
        * @param elide 省略符  
        * @return String  
        * @author kinglong  
        * @since 2006.07.20  
        */     
       public static String splitString(String str,int len,String elide) {     
              if (str == null) {     
                     return "";     
               }     
              byte[] strByte = str.getBytes();     
              int strLen = strByte.length;     
              //int elideLen = (elide.trim().length() == 0) ? 0 : elide.getBytes().length;     
              if (len >= strLen || len < 1) {     
                     return str;     
               }     
             /* if (len - elideLen > 0) {    
                      len = len - elideLen;    
               }  */   
              int count = 0;     
              for (int i = 0; i < len; i++) {     
                     int value = (int) strByte[i];     
                     if (value < 0) {     
                             count++;     
                      }     
               }     
              if (count % 2 != 0) {     
                      len = (len == 1) ? len + 1 : len - 1;     
               }     
              return new String(strByte, 0, len) + elide.trim();     
        }    
       /** 
      * @param args 
      */  
     public static void main(String[] args) {  
         // TODO Auto-generated method stub  
         Tools cs = new Tools();  
         //String s = "a加b等cc于c";  
         //String s = "a加b等cc于c";  
         String s ="aaas学位英语专区 学s位英语专区 学s位英语专区 学位英语专区";   
         try{  
             System.out.println(cs.splitString(s, 20));  
         }catch(Exception e){  
             e.printStackTrace();  
         }  
     }  
 }   

方法三

    Java代码   
    
  
 /** 
     　　 * 取字符串的前toCount个字符 
     　　 * 
     　　 * @param str 被处理字符串 
     　　 * @param toCount 截取长度 
     　　 * @param more 后缀字符串 
     　　 * @version 2004.11.24 
     　　 * @author zhulx 
     　　 * @return String 
     　　 */  
         public static String substring(String str, int toCount,String more)throws Exception{  
             int reInt = 0;  
             String reStr = "";  
             if (str == null) return "";  
             char[] tempChar = str.toCharArray();  
             for (int kk = 0; (kk < tempChar.length && toCount > reInt); kk++) {  
                 String s1 = str.valueOf(tempChar[kk]);  
                 byte[] b = s1.getBytes();  
                 reInt += b.length;  
                 reStr += tempChar[kk];  
             }  
             if (toCount == reInt || (toCount == reInt - 1))  
                 reStr += more;  
             return reStr;  
             }