Mysql保存Emoji表情报错java.sql.SQLException: Incorrect string value: ‘\xF0\x9F\x98\x8A(\xE2...‘ for column


场景

在mysql表中保存emoji表情数据时报错


报错信息

java.sql.SQLException: Incorrect string value: '\xF0\x9F\x98\x8A(\xE2...' for column

原因

mysql的utf8编码的一个字符最多3个字节,但是一个emoji表情为4个字节,所以utf8不支持存储emoji表情,导致报错


解决

方案1、将对应字段的字符编码改为:utf8mb4_unicode_ci

利:
客户端存储和请求数据的时候不需要都去做转换

弊:
1)转换编码格式可能会导致现有数据库的数据发生乱码;
2)转换编码格式之后,可能对后续的全文搜索功能有影响,大多数中文搜索引擎支持的编码格式为UTF-8;
3)若Android端的编码与数据库转换后的编码不符,将对Android端产生同样的问题。

方案2、在入库之前,将对应数据进行Emoji表情数据转换

入库前:unicode转换成htmlHexadecimal

//注:最终入库的数据是转换后的数据content
String content = EmojiUtil.parseToHtmlHexadecimal(msgData);

读取:htmlHexadecimal转换成unicode

String content = EmojiUtil.parseToUnicode(msgData);

emoji表情转换工具类

import com.vdurmont.emoji.EmojiParser;
import org.apache.commons.lang.StringUtils;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public class EmojiUtil {
    private static Logger logger = LoggerFactory.getLogger(EmojiUtil.class);

	   // 转义时标识
    private static final char unicode_separator = '&';
    private static final char unicode_prefix = 'u';
    private static final char separator = ':';

    /**
     * emoji unicode转换成htmlHexadecimal
     * @param src 问自己内容
     * @return
     */
    public static String parseToHtmlHexadecimal(String src) {
        try {
            if (StringUtils.isNotBlank(src)) {
                src = EmojiParser.parseToHtmlHexadecimal(src);
                src = escape(src);
            } else {
                return "";
            }
            return src;
        } catch (Exception e) {
            logger.error("emoji unicode转换成htmlHexadecimal出现异常,详情为:" + e.getMessage(), e);
            return escape(src);
        }
    }
    /**
     * emoji htmlHexadecimal转换成unicode
     * @param src 问自己内容
     * @return
     */
    public static String parseToUnicode(String src) {
        try {
            return EmojiParser.parseToUnicode(src);
        } catch (Exception e) {
            logger.error("emoji htmlHexadecimal转换成unicode出现异常,详情为:" + e.getMessage(), e);
            return "";
        }
    }

    private static boolean isEmojiCharacter(int codePoint) {
        return (codePoint >= 0x2600 && codePoint <= 0x27BF) // 杂项符号与符号字体
                || codePoint == 0x303D
                || codePoint == 0x2049
                || codePoint == 0x203C
                || (codePoint >= 0x2000 && codePoint <= 0x200F)//
                || (codePoint >= 0x2028 && codePoint <= 0x202F)//
                || codePoint == 0x205F //
                || (codePoint >= 0x2065 && codePoint <= 0x206F)//
                /* 标点符号占用区域 */
                || (codePoint >= 0x2100 && codePoint <= 0x214F)// 字母符号
                || (codePoint >= 0x2300 && codePoint <= 0x23FF)// 各种技术符号
                || (codePoint >= 0x2B00 && codePoint <= 0x2BFF)// 箭头A
                || (codePoint >= 0x2900 && codePoint <= 0x297F)// 箭头B
                || (codePoint >= 0x3200 && codePoint <= 0x32FF)// 中文符号
                || (codePoint >= 0xD800 && codePoint <= 0xDFFF)// 高低位替代符保留区域
                || (codePoint >= 0xE000 && codePoint <= 0xF8FF)// 私有保留区域
                || (codePoint >= 0xFE00 && codePoint <= 0xFE0F)// 变异选择器
                || codePoint >= 0x10000; // Plane在第二平面以上的,char都不可以存,全部都转
    }

    /**
     * 将带有emoji字符的字符串转换成可见字符标识
     */
    public static String escape(String src) {
        if (StringUtils.isBlank(src)) {
            return "";
        }
        int cpCount = src.codePointCount(0, src.length());
        int firCodeIndex = src.offsetByCodePoints(0, 0);
        int lstCodeIndex = src.offsetByCodePoints(0, cpCount - 1);
        StringBuilder sb = new StringBuilder(src.length());
        for (int index = firCodeIndex; index <= lstCodeIndex;index++) {
            int codepoint = src.codePointAt(index);
            if (isEmojiCharacter(codepoint)) {
                String hash = Integer.toHexString(codepoint);
                sb.append(unicode_separator).append(hash.length()).append(unicode_prefix).append(separator).append(hash);
            } else {
                sb.append((char) codepoint);
            }
        }
        return sb.toString();
    }

    /** 解析可见字符标识字符串 */
    public static String reverse(String src) {
        // 查找对应编码的标识位
        if (src == null) {
            return null;
        }
        StringBuilder sb = new StringBuilder(src.length());
        char[] sourceChar = src.toCharArray();
        int index = 0;
        while (index < sourceChar.length) {
            if (sourceChar[index] == unicode_separator) {
                if (index + 6 >= sourceChar.length) {
                    sb.append(sourceChar[index]);
                    index++;
                    continue;
                }
                // 自已的格式,与通用unicode格式不能互转
                if (sourceChar[index + 1] >= '4' && sourceChar[index + 1] <= '6' && sourceChar[index + 2] == unicode_prefix && sourceChar[index + 3] == separator) {
                    int length = Integer.parseInt(String.valueOf(sourceChar[index + 1]));
                    char[] hexchars = new char[length]; // 创建一个4至六位的数组,来存储uncode码的HEX值
                    for (int j = 0; j < length; j++) {
                        char ch = sourceChar[index + 4 + j];// 4位识别码
                        if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')) {
                            hexchars[j] = ch;

                        } else { // 字符范围不对
                            sb.append(sourceChar[index]);
                            index++;
                            break;
                        }
                    }
                    sb.append(Character.toChars(Integer.parseInt(new String(hexchars), 16)));
                    index += (4 + length);// 4位前缀+4-6位字符码
                } else if (sourceChar[index + 1] == unicode_prefix) { // 通用字符的反转
                    // 因为第二平面之上的,已经采用了我们自己转码格式,所以这里是固定的长度4
                    char[] hexchars = new char[4];
                    for (int j = 0; j < 4; j++) {
                        char ch = sourceChar[index + 2 + j]; // 两位识别码要去掉
                        if ((ch >= '0' && ch <= '9') || (ch >= 'a' && ch <= 'f')) {
                            hexchars[j] = ch; // 4位识别码
                        } else { // 字符范围不对
                            sb.append(sourceChar[index]);
                            index++;
                            break;
                        }
                        sb.append(Character.toChars(Integer.parseInt(String.valueOf(hexchars), 16)));
                        index += (2 + 4);// 2位前缀+4位字符码
                    }
                } else {
                    sb.append(sourceChar[index]);
                    index++;
                    continue;
                }
            } else {
                sb.append(sourceChar[index]);
                index++;
                continue;
            }
        }

        return sb.toString();
    }

    public static String filter(String src) {
        if (src == null) {
            return null;
        }
        int cpCount = src.codePointCount(0, src.length());
        int firCodeIndex = src.offsetByCodePoints(0, 0);
        int lstCodeIndex = src.offsetByCodePoints(0, cpCount - 1);
        StringBuilder sb = new StringBuilder(src.length());
        for (int index = firCodeIndex; index <= lstCodeIndex;) {
            int codepoint = src.codePointAt(index);
            if (!isEmojiCharacter(codepoint)) {
                System.err.println("codepoint:" + Integer.toHexString(codepoint));
                sb.append((char) codepoint);
            }
            index += ((Character.isSupplementaryCodePoint(codepoint)) ? 2 : 1);

        }
        return sb.toString();
    }
}
  • 3
    点赞
  • 1
    收藏
    觉得还不错? 一键收藏
  • 0
    评论

“相关推荐”对你有帮助么?

  • 非常没帮助
  • 没帮助
  • 一般
  • 有帮助
  • 非常有帮助
提交
评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值