‘JNI DETECTED ERROR IN APPLICATION: input is not valid Modified UTF-8:

最新推荐文章于 2024-01-26 17:14:29 发布

Lay_Nobody

最新推荐文章于 2024-01-26 17:14:29 发布

阅读量824

点赞数

分类专栏： C++ Android 文章标签： android

本文链接：https://blog.csdn.net/guo_lei_lamant/article/details/109549733

版权

C++ 同时被 2 个专栏收录

81 篇文章 4 订阅

订阅专栏

Android

9 篇文章 0 订阅

订阅专栏

Android 源码在线查看

https://cs.android.com/

现象特征

偶现且频率极低。
危害性较大，应用直接崩溃。

问题定位

以博文题目作为关键字搜索，有大把的分析（如：https://blog.csdn.net/liuzehn/article/details/89852076），不再赘述。
几乎我看到的所有文章全部指出该问题的根据在于：Java虚拟机内部的dalvik/vm/CheckJni.c中的checkUtfString函数抛出。我没有找到源码路径有 dalvik/vm/，最后在 art/runtime/jni/check_jni.cc 找到该方法调用。
借鉴 art/runtime/jni/check_jni.cc 中的 CheckUtfBytes 方法实现，在自己JNI层调用 NewStringUTF 之前检查字符合法性。
检测方法如下：

bool CheckUtfBytes(const char * bytes)
{
	while (*bytes != '\0') {
		const uint8_t* utf8 = reinterpret_cast<const uint8_t*>(bytes++);
		// Switch on the high four bits.
		switch (*utf8 >> 4) {
		case 0x00:
		case 0x01:
		case 0x02:
		case 0x03:
		case 0x04:
		case 0x05:
		case 0x06:
		case 0x07:
			// Bit pattern 0xxx. No need for any extra bytes.
			break;
		case 0x08:
		case 0x09:
		case 0x0a:
		case 0x0b:
			// Bit patterns 10xx, which are illegal start bytes.
			//*errorKind = "start";
			return false;
		case 0x0f:
			// Bit pattern 1111, which might be the start of a 4 byte sequence.
			if ((*utf8 & 0x08) == 0) {
				// Bit pattern 1111 0xxx, which is the start of a 4 byte sequence.
				// We consume one continuation byte here, and fall through to consume two more.
				utf8 = reinterpret_cast<const uint8_t*>(bytes++);
				if ((*utf8 & 0xc0) != 0x80) {
					//*errorKind = "continuation";
					return false;
				}
			}
			else {
				//*errorKind = "start";
				return false;
			}

			// Fall through to the cases below to consume two more continuation bytes.
			//FALLTHROUGH_INTENDED;
		case 0x0e:
			// Bit pattern 1110, so there are two additional bytes.
			utf8 = reinterpret_cast<const uint8_t*>(bytes++);
			if ((*utf8 & 0xc0) != 0x80) {
				//*errorKind = "continuation";
				return false;
			}

			// Fall through to consume one more continuation byte.
			//FALLTHROUGH_INTENDED;
		case 0x0c:
		case 0x0d:
			// Bit pattern 110x, so there is one additional byte.
			utf8 = reinterpret_cast<const uint8_t*>(bytes++);
			if ((*utf8 & 0xc0) != 0x80) {
				//*errorKind = "continuation";
				return false;
			}
			break;
		}
	}
	return true;
}

Android 源码 CheckUtfBytes 如下：

// Checks whether |bytes| is valid modified UTF-8. We also accept 4 byte UTF
// sequences in place of encoded surrogate pairs.
  static const uint8_t* CheckUtfBytes(const char* bytes, const char** errorKind) {
    while (*bytes != '\0') {
      const uint8_t* utf8 = reinterpret_cast<const uint8_t*>(bytes++);
      // Switch on the high four bits.
      switch (*utf8 >> 4) {
      case 0x00:
      case 0x01:
      case 0x02:
      case 0x03:
      case 0x04:
      case 0x05:
      case 0x06:
      case 0x07:
        // Bit pattern 0xxx. No need for any extra bytes.
        break;
      case 0x08:
      case 0x09:
      case 0x0a:
      case 0x0b:
         // Bit patterns 10xx, which are illegal start bytes.
        *errorKind = "start";
        return utf8;
      case 0x0f:
        // Bit pattern 1111, which might be the start of a 4 byte sequence.
        if ((*utf8 & 0x08) == 0) {
          // Bit pattern 1111 0xxx, which is the start of a 4 byte sequence.
          // We consume one continuation byte here, and fall through to consume two more.
          utf8 = reinterpret_cast<const uint8_t*>(bytes++);
          if ((*utf8 & 0xc0) != 0x80) {
            *errorKind = "continuation";
            return utf8;
          }
        } else {
          *errorKind = "start";
          return utf8;
        }

        // Fall through to the cases below to consume two more continuation bytes.
        FALLTHROUGH_INTENDED;
      case 0x0e:
        // Bit pattern 1110, so there are two additional bytes.
        utf8 = reinterpret_cast<const uint8_t*>(bytes++);
        if ((*utf8 & 0xc0) != 0x80) {
          *errorKind = "continuation";
          return utf8;
        }

        // Fall through to consume one more continuation byte.
        FALLTHROUGH_INTENDED;
      case 0x0c:
      case 0x0d:
        // Bit pattern 110x, so there is one additional byte.
        utf8 = reinterpret_cast<const uint8_t*>(bytes++);
        if ((*utf8 & 0xc0) != 0x80) {
          *errorKind = "continuation";
          return utf8;
        }
        break;
      }
    }
    return nullptr;
  }