MUTF-8是Android Dex文件中String内容的编码格式,它是根据UTF-16的再编码,看其转化代码
public static void encode(byte[] dst, int offset, String s) {
final int length = s.length();
for (int i = 0; i < length; i++) {
char ch = s.charAt(i);
if (ch != 0 && ch <= 127) { // U+0000 uses two bytes.
dst[offset++] = (byte) ch;
} else if (ch <= 2047) {
dst[offset++] = (byte) (0xc0 | (0x1f & (ch >> 6)));
dst[offset++] = (byte) (0x80 | (0x3f & ch));
} else {
dst[offset++] = (byte) (0xe0 | (0x0f & (ch >> 12)));
dst[offset++] = (byte) (0x80 | (0x3f & (ch >> 6)));
dst[offset++] = (byte) (0x80 | (0x3f & ch));
}
}
}
根据上面的代码写了MUTF-8转化为UTF-16的代码,做下笔记:
public static Byte[] decode(byte[] src) {
int len = src.length;
List<Byte> res = new ArrayList<>();
for (int i = 0; i < len; i++) {
if (src[i] > 0 && src[i] < 127) {
res.add(src[i]);
} else if (src[i] >= (byte)0xE0) { //需要处理三个字节
byte one = (byte) (src[i] & 0x0f);
byte sec = (byte) (src[i + 1] & 0x3f);
one = (byte) (one << 4 | (sec >>2));
sec = (byte) (sec << 6 | (src[i + 2] & 0x3f));
res.add(one);
res.add(sec);
i += 2;
} else if (src[i] >= (byte)0xC2) { //需要处理二个字节
byte one = (byte) (0x1f & src[i]);
byte sec = (byte) (0x3f & src[i + 1]);
sec = (byte) (one << 6 | sec);
one = (byte) (one >> 2);
res.add(one);
res.add(sec);
i += 1;
} else if (src[i] == (byte)0xC0) {
if (src[i + 1] == (byte)0x80) {
res.add((byte)0);
}
}
}
Byte[] resB = new Byte[res.size()];
res.toArray(resB);
return resB;
}