public static final int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
public static final int MAX_CODE_POINT = 0X10FFFF;
public static final int NO_UNICODE = 0xfffd;
/**
* 过滤4字长UTF-8编码字符,防止入库失败
* 4字长的UTF-8字符就是Unicode SMP(辅助平面)中的字符, 也就是Unicode编码大于U+FFFF的字符, 所以我们只需要获取字符串中各个字符的code point,
* 当code point 大于FFFF 65535时 过滤掉即可;
* 当从Unicode编码向某个字符集转换时,如果在该字符集中没有对应的编码,则得到0x3f(即问号字符?)
从其他字符集向Unicode编码转换时,如果这个二进制数在该字符集中没有标识任何的字符,则得到的结果是0xfffd65533
* @param str
* @return
*/
public static String filterUtf8mb4(String str) {
StringBuilder sb = new StringBuilder(str.length());
for (int i = 0; i
int codePoint = str.codePointAt(i);
if (codePoint >= MIN_SUPPLEMENTARY_CODE_POINT && codePoint
i++;
}else if(codePoint !=NO_UNICODE ){
sb.appendCodePoint(codePoint);
}
}
return sb.toString();
}