mysql 藏文字典_mysql无法压缩存储表情

importjava.util.regex.Matcher;importjava.util.regex.Pattern;importorg.apache.commons.lang.StringUtils;/***

* @ClassName: EmojiUtils

* @Description:过滤emoji表情

*@author: Simon

* @date: 2017年9月22日 上午11:11:00*/

public classEmojiUtils {public final static String unicodeReg = "[" + "\u4E00-\u9FBF" + //:CJK//统一表意符号//(CJK//Unified//Ideographs)

"\u4DC0-\u4DFF" + //:易经六十四卦符号 (Yijing Hexagrams Symbols)

"\u0000-\u007F" + //:C0控制符及基本拉丁文 (C0 Control and Basic Latin)

"\u0080-\u00FF" + //:C1控制符及拉丁:补充-1 (C1 Control and Latin 1//Supplement)

"\u0100-\u017F" + //:拉丁文扩展-A (Latin Extended-A)

"\u0180-\u024F" + //:拉丁文扩展-B (Latin Extended-B)

"\u0250-\u02AF" + //:国际音标扩展 (IPA Extensions)

"\u02B0-\u02FF" + //:空白修饰字母 (Spacing Modifiers)

"\u0300-\u036F" + //:结合用读音符号 (Combining Diacritics Marks)

"\u0370-\u03FF" + //:希腊文及科普特文 (Greek and Coptic)

"\u0400-\u04FF" + //:西里尔字母 (Cyrillic)

"\u0500-\u052F" + //:西里尔字母补充 (Cyrillic Supplement)

"\u0530-\u058F" + //:亚美尼亚语 (Armenian)

"\u0590-\u05FF" + //:希伯来文 (Hebrew)

"\u0600-\u06FF" + //:阿拉伯文 (Arabic)

"\u0700-\u074F" + //:叙利亚文 (Syriac)

"\u0750-\u077F" + //:阿拉伯文补充 (Arabic Supplement)

"\u0780-\u07BF" + //:马尔代夫语 (Thaana)//"\u07C0-\u077F"+//:西非书面语言 (N'Ko)

"\u0800-\u085F" + //:阿维斯塔语及巴列维语 (Avestan and Pahlavi)

"\u0860-\u087F" + //:Mandaic

"\u0880-\u08AF" + //:撒马利亚语 (Samaritan)

"\u0900-\u097F" + //:天城文书 (Devanagari)

"\u0980-\u09FF" + //:孟加拉语 (Bengali)

"\u0A00-\u0A7F" + //:锡克教文 (Gurmukhi)

"\u0A80-\u0AFF" + //:古吉拉特文 (Gujarati)

"\u0B00-\u0B7F" + //:奥里亚文 (Oriya)

"\u0B80-\u0BFF" + //:泰米尔文 (Tamil)

"\u0C00-\u0C7F" + //:泰卢固文 (Telugu)

"\u0C80-\u0CFF" + //:卡纳达文 (Kannada)

"\u0D00-\u0D7F" + //:德拉维族语 (Malayalam)

"\u0D80-\u0DFF" + //:僧伽罗语 (Sinhala)

"\u0E00-\u0E7F" + //:泰文 (Thai)

"\u0E80-\u0EFF" + //:老挝文 (Lao)

"\u0F00-\u0FFF" + //:藏文 (Tibetan)

"\u1000-\u109F" + //:缅甸语 (Myanmar)

"\u10A0-\u10FF" + //:格鲁吉亚语 (Georgian)

"\u1100-\u11FF" + //:朝鲜文 (Hangul Jamo)

"\u1200-\u137F" + //:埃塞俄比亚语 (Ethiopic)

"\u1380-\u139F" + //:埃塞俄比亚语补充 (Ethiopic Supplement)

"\u13A0-\u13FF" + //:切罗基语 (Cherokee)

"\u1400-\u167F" + //:统一加拿大土著语音节 (Unified Canadian Aboriginal//Syllabics)

"\u1680-\u169F" + //:欧甘字母 (Ogham)

"\u16A0-\u16FF" + //:如尼文 (Runic)

"\u1700-\u171F" + //:塔加拉语 (Tagalog)

"\u1720-\u173F" + //:Hanunóo

"\u1740-\u175F" + //:Buhid

"\u1760-\u177F" + //:Tagbanwa

"\u1780-\u17FF" + //:高棉语 (Khmer)

"\u1800-\u18AF" + //:蒙古文 (Mongolian)

"\u18B0-\u18FF" + //:Cham

"\u1900-\u194F" + //:Limbu

"\u1950-\u197F" + //:德宏泰语 (Tai Le)

"\u1980-\u19DF" + //:新傣仂语 (New Tai Lue)

"\u19E0-\u19FF" + //:高棉语记号 (Kmer Symbols)

"\u1A00-\u1A1F" + //:Buginese

"\u1A20-\u1A5F" + //:Batak

"\u1A80-\u1AEF" + //:Lanna

"\u1B00-\u1B7F" + //:巴厘语 (Balinese)

"\u1B80-\u1BB0" + //:巽他语 (Sundanese)

"\u1BC0-\u1BFF" + //:Pahawh Hmong

"\u1C00-\u1C4F" + //:雷布查语(Lepcha)

"\u1C50-\u1C7F" + //:Ol Chiki

"\u1C80-\u1CDF" + //:曼尼普尔语 (Meithei/Manipuri)

"\u1D00-\u1D7F" + //:语音学扩展 (Phone tic Extensions)

"\u1D80-\u1DBF" + //:语音学扩展补充 (Phonetic Extensions Supplement)

"\u1DC0-\u1DFF" + //结合用读音符号补充 (Combining Diacritics Marks//Supplement)

"\u1E00-\u1EFF" + //:拉丁文扩充附加 (Latin Extended Additional)

"\u1F00-\u1FFF" + //:希腊语扩充 (Greek Extended)

"\u2000-\u206F" + //:常用标点 (General Punctuation)

"\u2070-\u209F" + //:上标及下标 (Superscripts and Subscripts)

"\u20A0-\u20CF" + //:货币符号 (Currency Symbols)

"\u20D0-\u20FF" + //:组合用记号 (Combining Diacritics Marks for Symbols)

"\u2100-\u214F" + //:字母式符号 (Letterlike Symbols)

"\u2150-\u218F" + //:数字形式 (Number Form)

"\u2190-\u21FF" + //:箭头 (Arrows)

"\u2200-\u22FF" + //:数学运算符 (Mathematical Operator)

"\u2300-\u23FF" + //:杂项工业符号 (Miscellaneous Technical)

"\u2400-\u243F" + //:控制图片 (Control Pictures)

"\u2440-\u245F" + //:光学识别符 (Optical Character Recognition)

"\u2460-\u24FF" + //:封闭式字母数字 (Enclosed Alphanumerics)

"\u2500-\u257F" + //:制表符 (Box Drawing)

"\u2580-\u259F" + //:方块元素 (Block Element)

"\u25A0-\u25FF" + //:几何图形 (Geometric Shapes)

"\u2600-\u26FF" + //:杂项符号 (Miscellaneous Symbols)

"\u2700-\u27BF" + //:印刷符号 (Dingbats)

"\u27C0-\u27EF" + //:杂项数学符号-A (Miscellaneous Mathematical//Symbols-A)

"\u27F0-\u27FF" + //:追加箭头-A (Supplemental Arrows-A)

"\u2800-\u28FF" + //:盲文点字模型 (Braille Patterns)

"\u2900-\u297F" + //:追加箭头-B (Supplemental Arrows-B)

"\u2980-\u29FF" + //:杂项数学符号-B (Miscellaneous Mathematical//Symbols-B)

"\u2A00-\u2AFF" + //:追加数学运算符 (Supplemental Mathematical Operator)

"\u2B00-\u2BFF" + //:杂项符号和箭头 (Miscellaneous Symbols and Arrows)

"\u2C00-\u2C5F" + //:格拉哥里字母 (Glagolitic)

"\u2C60-\u2C7F" + //:拉丁文扩展-C (Latin Extended-C)

"\u2C80-\u2CFF" + //:古埃及语 (Coptic)

"\u2D00-\u2D2F" + //:格鲁吉亚语补充 (Georgian Supplement)

"\u2D30-\u2D7F" + //:提非纳文 (Tifinagh)

"\u2D80-\u2DDF" + //:埃塞俄比亚语扩展 (Ethiopic Extended)

"\u2E00-\u2E7F" + //:追加标点 (Supplemental Punctuation)

"\u2E80-\u2EFF" + //:CJK 部首补充 (CJK Radicals Supplement)

"\u2F00-\u2FDF" + //:康熙字典部首 (Kangxi Radicals)

"\u2FF0-\u2FFF" + //:表意文字描述符 (Ideographic Description Characters)

"\u3000-\u303F" + //:CJK 符号和标点 (CJK Symbols and Punctuation)

"\u3040-\u309F" + //:日文平假名 (Hiragana)

"\u30A0-\u30FF" + //:日文片假名 (Katakana)

"\u3100-\u312F" + //:注音字母 (Bopomofo)

"\u3130-\u318F" + //:朝鲜文兼容字母 (Hangul Compatibility Jamo)

"\u3190-\u319F" + //:象形字注释标志 (Kanbun)

"\u31A0-\u31BF" + //:注音字母扩展 (Bopomofo Extended)

"\u31C0-\u31EF" + //:CJK 笔画 (CJK Strokes)

"\u31F0-\u31FF" + //:日文片假名语音扩展 (Katakana Phonetic Extensions)

"\u3200-\u32FF" + //:封闭式 CJK 文字和月份 (Enclosed CJK Letters and//Months)

"\u3300-\u33FF" + //:CJK 兼容 (CJK Compatibility)

"\u3400-\u4DBF" + //:CJK 统一表意符号扩展 A (CJK Unified Ideographs//Extension A)

"\u4DC0-\u4DFF" + //:易经六十四卦符号 (Yijing Hexagrams Symbols)

"\u4E00-\u9FBF" + //:CJK 统一表意符号 (CJK Unified Ideographs)

"\uA000-\uA48F" + //:彝文音节 (Yi Syllables)

"\uA490-\uA4CF" + //:彝文字根 (Yi Radicals)

"\uA500-\uA61F" + //:Vai

"\uA660-\uA6FF" + //:统一加拿大土著语音节补充 (Unified Canadian Aboriginal//Syllabics Supplement)

"\uA700-\uA71F" + //:声调修饰字母 (Modifier Tone Letters)

"\uA720-\uA7FF" + //:拉丁文扩展-D (Latin Extended-D)

"\uA800-\uA82F" + //:Syloti Nagri

"\uA840-\uA87F" + //:八思巴字 (Phags-pa)

"\uA880-\uA8DF" + //:Saurashtra

"\uA900-\uA97F" + //:爪哇语 (Javanese)

"\uA980-\uA9DF" + //:Chakma

"\uAA00-\uAA3F" + //:Varang Kshiti

"\uAA40-\uAA6F" + //:Sorang Sompeng

"\uAA80-\uAADF" + //:Newari

"\uAB00-\uAB5F" + //:越南傣语 (Vi?t Thái)

"\uAB80-\uABA0" + //:Kayah Li

"\uAC00-\uD7AF" + //:朝鲜文音节 (Hangul Syllables)//"\uD800-\uDBFF"+//:High-half zone of UTF-16//"\uDC00-\uDFFF"+//:Low-half zone of UTF-16

"\uE000-\uF8FF" + //:自行使用区域 (Private Use Zone)

"\uF900-\uFAFF" + //:CJK 兼容象形文字 (CJK Compatibility Ideographs)

"\uFB00-\uFB4F" + //:字母表达形式 (Alphabetic Presentation Form)

"\uFB50-\uFDFF" + //:阿拉伯表达形式A (Arabic Presentation Form-A)

"\uFE00-\uFE0F" + //:变量选择符 (Variation Selector)

"\uFE10-\uFE1F" + //:竖排形式 (Vertical Forms)

"\uFE20-\uFE2F" + //:组合用半符号 (Combining Half Marks)

"\uFE30-\uFE4F" + //:CJK 兼容形式 (CJK Compatibility Forms)

"\uFE50-\uFE6F" + //:小型变体形式 (Small Form Variants)

"\uFE70-\uFEFF" + //:阿拉伯表达形式B (Arabic Presentation Form-B)

"\uFF00-\uFFEF" + //:半型及全型形式 (Halfwidth and Fullwidth Form)

"\uFFF0-\uFFFF]";//:特殊 (Specials);

/*** 将字符串转成unicode

*

*@paramstr

* 待转字符串

*@returnunicode字符串*/

public staticString convert(String str) {

str= (str == null ? "": str);

String tmp;

StringBuffer sb= new StringBuffer(1000);charc;inti, j;

sb.setLength(0);for (i = 0; i < str.length(); i++) {

c=str.charAt(i);

sb.append("\\u");

j= (c >>> 8); //取出高8位

tmp =Integer.toHexString(j);if (tmp.length() == 1) {

sb.append("0");

}

sb.append(tmp);

j= (c & 0xFF); //取出低8位

tmp =Integer.toHexString(j);if (tmp.length() == 1) {

sb.append("0");

}

sb.append(tmp);

}return (newString(sb).toUpperCase());

}/*** 2)unicode转成字符串,与上述过程反向操作即可 将unicode 字符串

*

*@paramstr

* 待转字符串

*@return普通字符串*/

public staticString revert(String str) {

str= (str == null ? "": str);if (str.indexOf("\\u") == -1)//如果不是unicode码则原样返回

returnstr;

StringBuffer sb= new StringBuffer(1000);for (int i = 0; i < str.length() - 6;) {

String strTemp= str.substring(i, i + 6);

String value= strTemp.substring(2);int c = 0;for (int j = 0; j < value.length(); j++) {char tempChar =value.charAt(j);int t = 0;switch(tempChar) {case 'a':

t= 10;break;case 'b':

t= 11;break;case 'c':

t= 12;break;case 'd':

t= 13;break;case 'e':

t= 14;break;case 'f':

t= 15;break;default:

t= tempChar - 48;break;

}

c+= t * ((int) Math.pow(16, (value.length() - j - 1)));

}

sb.append((char) c);

i= i + 6;

}returnsb.toString();

}/***

*@author: Simon

* @Description:传入需要过滤的字符

* @date: 2017年9月22日 上午11:11:33

*@paramstring

*@return

*/

public staticString emojiChange(String string) {//System.out.println("__________________________________");

try{//System.out.println("all-string:"+string);//System.out.println("all-unicode:"+convert(string));

Pattern pattern =Pattern.compile(unicodeReg);

StringBuffer sbBuffer= newStringBuffer();for (int i = 0; i < string.length(); i++) {char c =string.charAt(i);

String temp=String.valueOf(c);

Matcher matcher=pattern.matcher(temp);if(matcher.find()) {

sbBuffer.append(temp);

}else{

sbBuffer.append("□");

}//System.out.println("temp:"+temp+";unicode:"+convert(temp));

}//System.out.println("sb:"+sbBuffer.toString());//System.out.println("--------------------------------------");

returnsbBuffer.toString();

}catch(Exception e) {

e.printStackTrace();

}return "";

}

}

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值