/**
* <p>Description:获取字符串后的第二个?后的字符串</p>
* @author sunshaofeng
* @date 2018-9-18 15:45
* @return
* @version 1.0
*/
private static String getStr(String str, int n) {
try {
int i = 0;
int s = 0;
String ss="";
//循环遍历
while (i++ < n) {
s = str.indexOf("?", s + 1);
if (s == -1) {
return str;
}
}
ss=new String(str.substring(s+2));
//获取编码方式
String encoding = getEncoding(ss);
logger.info("encoding :"+encoding);
logger.info("ENCODE BEFORE :"+ss);
//方法一
try {
String gbkStyle = gbk2Utf(ss);
logger.info("gbk2Utf encodeToGBK ONE:"+gbkStyle);
} catch (Exception e) {
e.printStackTrace();
}
//方法二
try {
String gbkStyle2 = new String(getUTF8BytesFromGBKString(ss), "UTF-8");
logger.info("gbk2Utf encodeToGBK TWO:"+gbkStyle2);
} catch (Exception e) {
e.printStackTrace();
}
//方法三
try {
String gbkStyle3 = charsetConvert(ss,"UTF-8");
logger.info("gbk2Utf encodeToGBK THREE:"+gbkStyle3);
} catch (Exception e) {
e.printStackTrace();
}
//方法四
try {
String gbkStyle41 = gbkToUnicode(ss);
String gbkStyle42 = unicodeToUtf8(gbkStyle41);
logger.info("gbk2Utf encodeToGBK FOUR:"+gbkStyle42);
String encoding1 = getEncoding(gbkStyle42);
logger.info("encoding After:"+encoding1);
} catch (Exception e) {
e.printStackTrace();
}
//方法五
try {
String gbkStyle5=new String(ss.getBytes("GB2312"),"UTF-8");//转换后的结果
logger.info("gbk2Utf encodeToGBK FIVE:"+gbkStyle5);
} catch (Exception e) {
e.printStackTrace();
}
//方法六
try {
String gbkStyle6 = URLEncoder.encode (ss, "UTF-8" );
logger.info("gbk2Utf encodeToGBK 6:"+gbkStyle6);
} catch (Exception e) {
e.printStackTrace();
}
return ss;
} catch (Exception e) {
e.printStackTrace();
}
return null;
}
/**
* GBK转UTF-8 方式一
*/
private static String gbk2Utf(String gbk) throws UnsupportedEncodingException {
char[] c = gbk.toCharArray();
byte[] fullByte = new byte[3*c.length];
for (int i=0; i<c.length; i++) {
String binary = Integer.toBinaryString(c[i]);
StringBuffer sb = new StringBuffer();
int len = 16 - binary.length();
//前面补零
for(int j=0; j<len; j++){
sb.append("0");
}
sb.append(binary);
//增加位,达到到24位3个字节
sb.insert(0, "1110");
sb.insert(8, "10");
sb.insert(16, "10");
fullByte[i*3] = Integer.valueOf(sb.substring(0, 8), 2).byteValue();//二进制字符串创建整型
fullByte[i*3+1] = Integer.valueOf(sb.substring(8, 16), 2).byteValue();
fullByte[i*3+2] = Integer.valueOf(sb.substring(16, 24), 2).byteValue();
}
//模拟UTF-8编码的网站显示
return(new String(fullByte,"UTF-8"));
}
/**
* GBK转 UTF-8方式二:
*/
public static byte[] getUTF8BytesFromGBKString(String gbkStr) {
int n = gbkStr.length();
byte[] utfBytes = new byte[3 * n];
int k = 0;
for (int i = 0; i < n; i++) {
int m = gbkStr.charAt(i);
if (m < 128 && m >= 0) {
utfBytes[k++] = (byte) m;
continue;
}
utfBytes[k++] = (byte) (0xe0 | (m >> 12));
utfBytes[k++] = (byte) (0x80 | ((m >> 6) & 0x3f));
utfBytes[k++] = (byte) (0x80 | (m & 0x3f));
}
if (k < utfBytes.length) {
byte[] tmp = new byte[k];
System.arraycopy(utfBytes, 0, tmp, 0, k);
return tmp;
}
return utfBytes;
}
/**
* GBK转 UTF-8 方式三
*/
private static String charsetConvert(String str, String charset) {
try {
str = new sun.misc.BASE64Encoder().encode(str.getBytes(charset));
byte[] bytes = new sun.misc.BASE64Decoder().decodeBuffer(str);
str = new String(bytes, charset);
} catch(IOException e) {
e.printStackTrace();
}
return str;
}
/**
* 获取字符串的编码方式
* @param str
* @return
*/
public static String getEncoding(String str) {
String encode = "GB2312";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
String s = encode;
return s;
}
} catch (Exception exception) {
}
encode = "ISO-8859-1";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
String s1 = encode;
return s1;
}
} catch (Exception exception1) {
}
encode = "UTF-8";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
String s2 = encode;
return s2;
}
} catch (Exception exception2) {
}
encode = "GBK";
try {
if (str.equals(new String(str.getBytes(encode), encode))) {
String s3 = encode;
return s3;
}
} catch (Exception exception3) {
}
return "";
}
/**
* gbk转unicode
* @param str
* @return
*/
public static String gbkToUnicode(String str) {
StringBuffer result = new StringBuffer();
for (int i = 0; i < str.length(); i++) {
char chr1 = (char) str.charAt(i);
if ((chr1 & (0x00FF)) == chr1) {
result.append(chr1);
continue;
}
result.append("\\u" + Integer.toHexString((int) chr1));
}
return result.toString();
}
/**
* unicode转utf-8
* @param theString
* @return
*/
public static String unicodeToUtf8(String theString) {
char aChar;
int len = theString.length();
StringBuffer outBuffer = new StringBuffer(len);
for (int x = 0; x < len;) {
aChar = theString.charAt(x++);
if (aChar == '\\') {
aChar = theString.charAt(x++);
if (aChar == 'u') {
int value = 0;
for (int i = 0; i < 4; i++) {
aChar = theString.charAt(x++);
switch (aChar) {
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
value = (value << 4) + aChar - '0';
break;
case 'a':
case 'b':
case 'c':
case 'd':
case 'e':
case 'f':
value = (value << 4) + 10 + aChar - 'a';
break;
case 'A':
case 'B':
case 'C':
case 'D':
case 'E':
case 'F':
value = (value << 4) + 10 + aChar - 'A';
break;
default:
throw new IllegalArgumentException(
"Malformed \\uxxxx encoding.");
}
}
outBuffer.append((char) value);
} else {
if (aChar == 't')
aChar = '\t';
else if (aChar == 'r')
aChar = '\r';
else if (aChar == 'n')
aChar = '\n';
else if (aChar == 'f')
aChar = '\f';
outBuffer.append(aChar);
}
} else
outBuffer.append(aChar);
}
return outBuffer.toString();
}