JNI WARNING: input is not valid Modified UTF-8: illegal start byte
在加解密可能会出现非utf8字符
导致vm挂掉,所以需要在 NewStringUTF之前验证char*是不是含有非utf8字符
从vm扣除可用源码
typedef uint8_t jboolean; /* unsigned 8 bits */
类型
typedef u1 uint8_t;
- static u1 checkUtfBytes(const char* bytes, const char** errorKind) {
- while (*bytes != '\0') {
- u1 utf8 = *(bytes++);
- // Switch on the high four bits.
- switch (utf8 >> 4) {
- case 0x00:
- case 0x01:
- case 0x02:
- case 0x03:
- case 0x04:
- case 0x05:
- case 0x06:
- case 0x07:
- // Bit pattern 0xxx. No need for any extra bytes.
- break;
- case 0x08:
- case 0x09:
- case 0x0a:
- case 0x0b:
- case 0x0f:
- /*
- * Bit pattern 10xx or 1111, which are illegal start bytes.
- * Note: 1111 is valid for normal UTF-8, but not the
- * modified UTF-8 used here.
- */
- *errorKind = "start";
- return utf8;
- case 0x0e:
- // Bit pattern 1110, so there are two additional bytes.
- utf8 = *(bytes++);
- if ((utf8 & 0xc0) != 0x80) {
- *errorKind = "continuation";
- return utf8;
- }
- // Fall through to take care of the final byte.
- case 0x0c:
- case 0x0d:
- // Bit pattern 110x, so there is one additional byte.
- utf8 = *(bytes++);
- if ((utf8 & 0xc0) != 0x80) {
- *errorKind = "continuation";
- return utf8;
- }
- break;
- }
- }
- return 0;
- }
验证函数是上面的
- const char* errorKind = NULL;
- u1 utf8 = checkUtfBytes(bytes, &errorKind);
- if (errorKind != NULL) {
- ALOGW("JNI WARNING: input is not valid Modified UTF-8: illegal %s byte %#x", errorKind, utf8);
- ALOGW(" string: '%s'", bytes);
- showLocation();
- abortMaybe();
- }
使用:把char和 errorKind传入
如果errorKind不为NULL说明含有非utf-8代码。做相应处理