urt-8转成GBK 之多种方法

最新推荐文章于 2023-04-19 19:32:05 发布
weixin_34138255
最新推荐文章于 2023-04-19 19:32:05 发布
阅读量363
点赞数
文章标签： python java c#
原文链接：http://www.cnblogs.com/liushisaonian/p/9894633.html
版权
	/**
	 *  <p>Description:获取字符串后的第二个?后的字符串</p>
	 *  @author sunshaofeng
	 * @date 2018-9-18 15:45
	 * @return 
	 * @version 1.0
	 */
	 private static String getStr(String str, int n) {
		  try {
	        int i = 0;
	        int s = 0;
	        String ss="";
	        //循环遍历
	        while (i++ < n) {
	            s = str.indexOf("?", s + 1);
	            if (s == -1) {
	                return str;
	            }
	        }
			ss=new String(str.substring(s+2));
			//获取编码方式
			String encoding = getEncoding(ss);
			logger.info("encoding :"+encoding);
			logger.info("ENCODE BEFORE :"+ss);
			//方法一
			try {
				String	gbkStyle = gbk2Utf(ss);
				logger.info("gbk2Utf encodeToGBK ONE:"+gbkStyle);
			} catch (Exception e) {
				e.printStackTrace();
			}
			//方法二
			try {
				String gbkStyle2 = new String(getUTF8BytesFromGBKString(ss), "UTF-8");
				logger.info("gbk2Utf encodeToGBK TWO:"+gbkStyle2);
			} catch (Exception e) {
				e.printStackTrace();
			}
			//方法三
			try {
				String gbkStyle3 = charsetConvert(ss,"UTF-8");
				logger.info("gbk2Utf encodeToGBK THREE:"+gbkStyle3);
			} catch (Exception e) {
				e.printStackTrace();
			}
			//方法四
			try {
				String gbkStyle41 = gbkToUnicode(ss);
				String gbkStyle42 = unicodeToUtf8(gbkStyle41);
				logger.info("gbk2Utf encodeToGBK FOUR:"+gbkStyle42);
				
				String encoding1 = getEncoding(gbkStyle42);
				logger.info("encoding After:"+encoding1);
			} catch (Exception e) {
				e.printStackTrace();
			}
			//方法五
			try {
				String gbkStyle5=new String(ss.getBytes("GB2312"),"UTF-8");//转换后的结果
				logger.info("gbk2Utf encodeToGBK FIVE:"+gbkStyle5);
			} catch (Exception e) {
				e.printStackTrace();
			}
			//方法六
			try {
				String gbkStyle6  = URLEncoder.encode (ss, "UTF-8" );
				logger.info("gbk2Utf encodeToGBK 6:"+gbkStyle6);
			} catch (Exception e) {
				e.printStackTrace();
			}
			
			return ss;
			} catch (Exception e) {
				e.printStackTrace();
			}
	        return null;
}
	/**
	 * GBK转UTF-8 方式一
	 */
	 
	 private  static String gbk2Utf(String gbk) throws UnsupportedEncodingException {  
 	    char[] c = gbk.toCharArray();  
 	    byte[] fullByte = new byte[3*c.length];  
 	    for (int i=0; i<c.length; i++) {  
 	        String binary = Integer.toBinaryString(c[i]);  
 	        StringBuffer sb = new StringBuffer();  
 	        int len = 16 - binary.length();  
 	        //前面补零  
 	        for(int j=0; j<len; j++){  
 	                sb.append("0");  
 	            }  
 	        sb.append(binary);  
 	        //增加位，达到到24位3个字节  
 	        sb.insert(0, "1110");  
 	            sb.insert(8, "10");  
 	            sb.insert(16, "10");  
 	            fullByte[i*3] = Integer.valueOf(sb.substring(0, 8), 2).byteValue();//二进制字符串创建整型  
 	            fullByte[i*3+1] = Integer.valueOf(sb.substring(8, 16), 2).byteValue();  
 	            fullByte[i*3+2] = Integer.valueOf(sb.substring(16, 24), 2).byteValue();  
 	    }  
 	    //模拟UTF-8编码的网站显示  
 	    
 	    return(new String(fullByte,"UTF-8"));
 	}
	 /**
	  * GBK转 UTF-8方式二:
	  */
	 
	 public static byte[] getUTF8BytesFromGBKString(String gbkStr) {  
	        int n = gbkStr.length();  
	        byte[] utfBytes = new byte[3 * n];  
	        int k = 0;  
	        for (int i = 0; i < n; i++) {  
	            int m = gbkStr.charAt(i);  
	            if (m < 128 && m >= 0) {  
	                utfBytes[k++] = (byte) m;  
	                continue;  
	            }  
	            utfBytes[k++] = (byte) (0xe0 | (m >> 12));  
	            utfBytes[k++] = (byte) (0x80 | ((m >> 6) & 0x3f));  
	            utfBytes[k++] = (byte) (0x80 | (m & 0x3f));  
	        }  
	        if (k < utfBytes.length) {  
	            byte[] tmp = new byte[k];  
	            System.arraycopy(utfBytes, 0, tmp, 0, k);  
	            return tmp;  
	        }  
	        return utfBytes;  
	    }
	 /**
	  * GBK转 UTF-8 方式三
	  */
	 private static String charsetConvert(String str, String charset) {
		    try {
		        str = new sun.misc.BASE64Encoder().encode(str.getBytes(charset));
		        byte[] bytes = new sun.misc.BASE64Decoder().decodeBuffer(str);
		        str = new String(bytes, charset);
		    } catch(IOException e) {
		        e.printStackTrace();
		    }
		    return str;
		}
	 
	 
	 	/**
	 	 * 获取字符串的编码方式
	 	 * @param str
	 	 * @return
	 	 */
		public static String getEncoding(String str) {      
		       String encode = "GB2312";      
		      try {      
		          if (str.equals(new String(str.getBytes(encode), encode))) {      
		               String s = encode;      
		              return s;      
		           }      
		       } catch (Exception exception) {      
		       }      
		       encode = "ISO-8859-1";      
		      try {      
		          if (str.equals(new String(str.getBytes(encode), encode))) {      
		               String s1 = encode;      
		              return s1;      
		           }      
		       } catch (Exception exception1) {      
		       }      
		       encode = "UTF-8";      
		      try {      
		          if (str.equals(new String(str.getBytes(encode), encode))) {      
		               String s2 = encode;      
		              return s2;      
		           }      
		       } catch (Exception exception2) {      
		       }      
		       encode = "GBK";      
		      try {      
		          if (str.equals(new String(str.getBytes(encode), encode))) {      
		               String s3 = encode;      
		              return s3;      
		           }      
		       } catch (Exception exception3) {      
		       }      
		      return "";      
		   }
		/**
	     * gbk转unicode
	     * @param str
	     * @return
	     */
	    public static String gbkToUnicode(String str) {
	        StringBuffer result = new StringBuffer();
	        for (int i = 0; i < str.length(); i++) {
	            char chr1 = (char) str.charAt(i);
	            if ((chr1 & (0x00FF)) == chr1) {
	                result.append(chr1);
	                continue;
	            }
	            result.append("\\u" + Integer.toHexString((int) chr1));
	        }
	        return result.toString();
	    }
	    /**
	     * unicode转utf-8
	     * @param theString
	     * @return
	     */
	    public static String unicodeToUtf8(String theString) {
	        char aChar;
	        int len = theString.length();
	        StringBuffer outBuffer = new StringBuffer(len);
	        for (int x = 0; x < len;) {
	            aChar = theString.charAt(x++);
	            if (aChar == '\\') {
	                aChar = theString.charAt(x++);
	                if (aChar == 'u') {
	                    int value = 0;
	                    for (int i = 0; i < 4; i++) {
	                        aChar = theString.charAt(x++);
	                        switch (aChar) {
	                        case '0':
	                        case '1':
	                        case '2':
	                        case '3':
	                        case '4':
	                        case '5':
	                        case '6':
	                        case '7':
	                        case '8':
	                        case '9':
	                            value = (value << 4) + aChar - '0';
	                            break;
	                        case 'a':
	                        case 'b':
	                        case 'c':
	                        case 'd':
	                        case 'e':
	                        case 'f':
	                            value = (value << 4) + 10 + aChar - 'a';
	                            break;
	                        case 'A':
	                        case 'B':
	                        case 'C':
	                        case 'D':
	                        case 'E':
	                        case 'F':
	                            value = (value << 4) + 10 + aChar - 'A';
	                            break;
	                        default:
	                            throw new IllegalArgumentException(
	                                    "Malformed   \\uxxxx   encoding.");
	                        }
	                    }
	                    outBuffer.append((char) value);
	                } else {
	                    if (aChar == 't')
	                        aChar = '\t';
	                    else if (aChar == 'r')
	                        aChar = '\r';
	                    else if (aChar == 'n')
	                        aChar = '\n';
	                    else if (aChar == 'f')
	                        aChar = '\f';
	                    outBuffer.append(aChar);
	                }
	            } else
	                outBuffer.append(aChar);
	        }
	        return outBuffer.toString();
	    }
转载于:https://www.cnblogs.com/liushisaonian/p/9894633.html
weixin_34138255
关注
0
点赞
踩
0

收藏

觉得还不错? 一键收藏
0
评论
urt-8转成GBK 之多种方法

/** * <p>Description:获取字符串后的第二个?后的字符串</p> * @author sunshaofeng * @date 2018-9-18 15:45 * @return * @version 1.0 */ private static String getStr(String str, int ...
复制链接

扫一扫