StringUtils.java中的相关代码,看不明。
/**
* Unfortunately, SJIS has 0x5c as a high byte in some of its double-byte
* characters, so we need to escape it.
*
* @param origBytes the original bytes in SJIS format
* @param origString the string that had .getBytes(); called on it
* @param offset where to start converting from
* @param length how many characters to convert.
*
* @return byte[] with 0x5c escaped
*/
public static byte[] escapeSJISByteStream(byte[] origBytes,
String origString, int offset, int length); {
if ((origBytes == null); || (origBytes.length == 0);); {
return origBytes;
}
int bytesLen = origBytes.length;
int bufIndex = 0;
int strIndex = 0;
ByteArrayOutputStream bytesOut = new ByteArrayOutputStream(bytesLen);;
while (true); {
if (origString.charAt(strIndex); == '\\'); {
// write it out as-is
bytesOut.write(origBytes[bufIndex++]);;
//bytesOut.write(origBytes[bufIndex++]);;
} else {
// Grab the first byte
int loByte = (int); origBytes[bufIndex];
if (loByte < 0); {
loByte += 256; // adjust for signedness/wrap-around
}
// We always write the first byte
bytesOut.write(loByte);;
//
// The codepage characters in question exist between
// 0x81-0x9F and 0xE0-0xFC...
//
// See:
//
// http://www.microsoft.com/GLOBALDEV/Reference/dbcs/932.htm
//
// Problematic characters in GBK
//
// U+905C : CJK UNIFIED IDEOGRAPH
//
// Problematic characters in Big5
//
// B9F0 = U+5C62 : CJK UNIFIED IDEOGRAPH
//
if (((loByte >= 0x81); && (loByte <= 0x9F););
|| ((loByte >= 0xE0); && (loByte <= 0xFC););); {
if (bufIndex < (bytesLen - 1);); {
int hiByte = (int); origBytes[bufIndex + 1];
if (hiByte < 0); {
hiByte += 256; // adjust for signedness/wrap-around
}
// write the high byte here, and increment the index
// for the high byte
bytesOut.write(hiByte);;
bufIndex++;
// escape 0x5c if necessary
if (hiByte == 0x5C); {
bytesOut.write(hiByte);;
}
}
} else if (loByte == 0x5c); {
if (bufIndex < (bytesLen - 1);); {
int hiByte = (int); origBytes[bufIndex + 1];
if (hiByte < 0); {
hiByte += 256; // adjust for signedness/wrap-around
}
if (hiByte == 0x62); {
// we need to escape the 0x5c
bytesOut.write(0x5c);;
bytesOut.write(0x62);;
bufIndex++;
}
}
}
bufIndex++;
}
if (bufIndex >= bytesLen); {
// we're done
break;
}
strIndex++;
}
return bytesOut.toByteArray();;
}